From e93a70889b13901394a65116eaa93c9fc92b491c Mon Sep 17 00:00:00 2001 From: Maxim Mineev Date: Mon, 25 Nov 2024 22:23:32 +0500 Subject: [PATCH 1/7] =?UTF-8?q?=D0=91=D0=B0=D0=B7=D0=BE=D0=B2=D0=B0=D1=8F?= =?UTF-8?q?=20=D0=B0=D1=80=D1=85=D0=B8=D1=82=D0=B5=D0=BA=D1=82=D1=83=D1=80?= =?UTF-8?q?=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + cs/Markdown/Enums/MarkdownTokenName.cs | 9 +++++++++ cs/Markdown/HtmlParser.cs | 18 ++++++++++++++++++ cs/Markdown/Interfaces/ILexer.cs | 6 ++++++ cs/Markdown/Interfaces/IMarkdownTokenType.cs | 8 ++++++++ .../Interfaces/IPairedMarkdownTokenType.cs | 7 +++++++ cs/Markdown/Interfaces/IParser.cs | 6 ++++++ .../Interfaces/ISingleMarkdownTokenType.cs | 6 ++++++ cs/Markdown/Interfaces/IToken.cs | 9 +++++++++ cs/Markdown/Markdown.csproj | 10 ++++++++++ cs/Markdown/MarkdownLexer.cs | 12 ++++++++++++ cs/Markdown/MarkdownToHtmlConverter.cs | 13 +++++++++++++ cs/Markdown/MarkdownToken.cs | 11 +++++++++++ cs/Markdown/Program.cs | 9 +++++++++ .../TokenTypes/BoldMarkdownTokenType.cs | 11 +++++++++++ .../TokenTypes/HeaderMarkdownTokenType.cs | 9 +++++++++ .../TokenTypes/ItalicMarkdownTokenType.cs | 11 +++++++++++ .../TokenTypes/TextMarkdownTokenType.cs | 8 ++++++++ cs/MarkdownTests/MarkdownTests.csproj | 10 ++++++++++ cs/MarkdownTests/Program.cs | 9 +++++++++ cs/clean-code.sln | 12 ++++++++++++ 21 files changed, 195 insertions(+) create mode 100644 cs/Markdown/Enums/MarkdownTokenName.cs create mode 100644 cs/Markdown/HtmlParser.cs create mode 100644 cs/Markdown/Interfaces/ILexer.cs create mode 100644 cs/Markdown/Interfaces/IMarkdownTokenType.cs create mode 100644 cs/Markdown/Interfaces/IPairedMarkdownTokenType.cs create mode 100644 cs/Markdown/Interfaces/IParser.cs create mode 100644 cs/Markdown/Interfaces/ISingleMarkdownTokenType.cs create mode 100644 cs/Markdown/Interfaces/IToken.cs create mode 100644 cs/Markdown/Markdown.csproj create mode 100644 cs/Markdown/MarkdownLexer.cs create mode 100644 cs/Markdown/MarkdownToHtmlConverter.cs create mode 100644 cs/Markdown/MarkdownToken.cs create mode 100644 cs/Markdown/Program.cs create mode 100644 cs/Markdown/TokenTypes/BoldMarkdownTokenType.cs create mode 100644 cs/Markdown/TokenTypes/HeaderMarkdownTokenType.cs create mode 100644 cs/Markdown/TokenTypes/ItalicMarkdownTokenType.cs create mode 100644 cs/Markdown/TokenTypes/TextMarkdownTokenType.cs create mode 100644 cs/MarkdownTests/MarkdownTests.csproj create mode 100644 cs/MarkdownTests/Program.cs diff --git a/.gitignore b/.gitignore index eaadbddaf..3fffa6f91 100644 --- a/.gitignore +++ b/.gitignore @@ -239,3 +239,4 @@ _Pvt_Extensions **/.idea **/.vscode **/node_modules +/cs/Markdown/Markdown.csproj.DotSettings diff --git a/cs/Markdown/Enums/MarkdownTokenName.cs b/cs/Markdown/Enums/MarkdownTokenName.cs new file mode 100644 index 000000000..34fd7494b --- /dev/null +++ b/cs/Markdown/Enums/MarkdownTokenName.cs @@ -0,0 +1,9 @@ +namespace Markdown.Enums; + +public enum MarkdownTokenName +{ + Text, + Header, + Bold, + Italic +} \ No newline at end of file diff --git a/cs/Markdown/HtmlParser.cs b/cs/Markdown/HtmlParser.cs new file mode 100644 index 000000000..498938e22 --- /dev/null +++ b/cs/Markdown/HtmlParser.cs @@ -0,0 +1,18 @@ +using Markdown.Enums; + +namespace Markdown; + +public class HtmlParser : IParser +{ + private static readonly Dictionary TokenToHtmlTag = new () + { + { MarkdownTokenName.Bold, "strong" }, + { MarkdownTokenName.Italic, "em" }, + { MarkdownTokenName.Header, "h1" } + }; + + public string Parse(IEnumerable tokens) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/Interfaces/ILexer.cs b/cs/Markdown/Interfaces/ILexer.cs new file mode 100644 index 000000000..e13406f0b --- /dev/null +++ b/cs/Markdown/Interfaces/ILexer.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public interface ILexer +{ + IEnumerable Tokenize(string input); +} \ No newline at end of file diff --git a/cs/Markdown/Interfaces/IMarkdownTokenType.cs b/cs/Markdown/Interfaces/IMarkdownTokenType.cs new file mode 100644 index 000000000..14f297370 --- /dev/null +++ b/cs/Markdown/Interfaces/IMarkdownTokenType.cs @@ -0,0 +1,8 @@ +using Markdown.Enums; + +namespace Markdown; + +public interface IMarkdownTokenType +{ + MarkdownTokenName Name { get; } +} \ No newline at end of file diff --git a/cs/Markdown/Interfaces/IPairedMarkdownTokenType.cs b/cs/Markdown/Interfaces/IPairedMarkdownTokenType.cs new file mode 100644 index 000000000..896446163 --- /dev/null +++ b/cs/Markdown/Interfaces/IPairedMarkdownTokenType.cs @@ -0,0 +1,7 @@ +namespace Markdown; + +public interface IPairedMarkdownTokenType : IMarkdownTokenType +{ + string CloseTag { get; } + bool CanBeWithoutCloseTag { get; } +} \ No newline at end of file diff --git a/cs/Markdown/Interfaces/IParser.cs b/cs/Markdown/Interfaces/IParser.cs new file mode 100644 index 000000000..e6f73bc06 --- /dev/null +++ b/cs/Markdown/Interfaces/IParser.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public interface IParser +{ + string Parse(IEnumerable tokens); +} \ No newline at end of file diff --git a/cs/Markdown/Interfaces/ISingleMarkdownTokenType.cs b/cs/Markdown/Interfaces/ISingleMarkdownTokenType.cs new file mode 100644 index 000000000..2e1e6a4cb --- /dev/null +++ b/cs/Markdown/Interfaces/ISingleMarkdownTokenType.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public interface ISingleMarkdownTokenType : IMarkdownTokenType +{ + string OpenTag { get; } +} \ No newline at end of file diff --git a/cs/Markdown/Interfaces/IToken.cs b/cs/Markdown/Interfaces/IToken.cs new file mode 100644 index 000000000..0c11513ac --- /dev/null +++ b/cs/Markdown/Interfaces/IToken.cs @@ -0,0 +1,9 @@ +namespace Markdown; + +public interface IToken +{ + int Position { get; } + int Length { get; } + string Value { get; } + int GetIndexToNextToken(); +} \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj new file mode 100644 index 000000000..2f4fc7765 --- /dev/null +++ b/cs/Markdown/Markdown.csproj @@ -0,0 +1,10 @@ + + + + Exe + net8.0 + enable + enable + + + diff --git a/cs/Markdown/MarkdownLexer.cs b/cs/Markdown/MarkdownLexer.cs new file mode 100644 index 000000000..9d7b6387f --- /dev/null +++ b/cs/Markdown/MarkdownLexer.cs @@ -0,0 +1,12 @@ +namespace Markdown; + +public class MarkdownLexer : ILexer +{ + public int Position { get; private set; } + private List Tokens { get; } = []; + + public IEnumerable Tokenize(string input) + { + throw new NotImplementedException(); + } +} \ No newline at end of file diff --git a/cs/Markdown/MarkdownToHtmlConverter.cs b/cs/Markdown/MarkdownToHtmlConverter.cs new file mode 100644 index 000000000..6130bb8ab --- /dev/null +++ b/cs/Markdown/MarkdownToHtmlConverter.cs @@ -0,0 +1,13 @@ +namespace Markdown; + +public class MarkdownToHtmlConverter(ILexer lexer, IParser parser) +{ + public ILexer Lexer { get; } = lexer; + public IParser Parser { get; } = parser; + + public string Convert(string input) + { + var tokens = Lexer.Tokenize(input); + return Parser.Parse(tokens); + } +} \ No newline at end of file diff --git a/cs/Markdown/MarkdownToken.cs b/cs/Markdown/MarkdownToken.cs new file mode 100644 index 000000000..8705b10de --- /dev/null +++ b/cs/Markdown/MarkdownToken.cs @@ -0,0 +1,11 @@ +namespace Markdown; + +public class MarkdownToken(int position, string value, IMarkdownTokenType type) : IToken +{ + public int Position { get; } = position; + public string Value { get; } = value; + public int Length => Value.Length; + public IMarkdownTokenType Type { get; } = type; + + public int GetIndexToNextToken() => Position + Length; +} \ No newline at end of file diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs new file mode 100644 index 000000000..525eda88d --- /dev/null +++ b/cs/Markdown/Program.cs @@ -0,0 +1,9 @@ +namespace Markdown; + +class Program +{ + static void Main(string[] args) + { + Console.WriteLine("Hello, World!"); + } +} \ No newline at end of file diff --git a/cs/Markdown/TokenTypes/BoldMarkdownTokenType.cs b/cs/Markdown/TokenTypes/BoldMarkdownTokenType.cs new file mode 100644 index 000000000..fbdd81092 --- /dev/null +++ b/cs/Markdown/TokenTypes/BoldMarkdownTokenType.cs @@ -0,0 +1,11 @@ +using Markdown.Enums; + +namespace Markdown.TokenTypes; + +public class BoldMarkdownTokenType : IPairedMarkdownTokenType +{ + public MarkdownTokenName Name => MarkdownTokenName.Bold; + public string OpenTag => "__"; + public string CloseTag => "__"; + public bool CanBeWithoutCloseTag => false; +} \ No newline at end of file diff --git a/cs/Markdown/TokenTypes/HeaderMarkdownTokenType.cs b/cs/Markdown/TokenTypes/HeaderMarkdownTokenType.cs new file mode 100644 index 000000000..9e1a703f6 --- /dev/null +++ b/cs/Markdown/TokenTypes/HeaderMarkdownTokenType.cs @@ -0,0 +1,9 @@ +using Markdown.Enums; + +namespace Markdown.TokenTypes; + +public class HeaderMarkdownTokenType : ISingleMarkdownTokenType +{ + public MarkdownTokenName Name => MarkdownTokenName.Header; + public string OpenTag => "# "; +} \ No newline at end of file diff --git a/cs/Markdown/TokenTypes/ItalicMarkdownTokenType.cs b/cs/Markdown/TokenTypes/ItalicMarkdownTokenType.cs new file mode 100644 index 000000000..e0949b964 --- /dev/null +++ b/cs/Markdown/TokenTypes/ItalicMarkdownTokenType.cs @@ -0,0 +1,11 @@ +using Markdown.Enums; + +namespace Markdown.TokenTypes; + +public class ItalicMarkdownTokenType : IPairedMarkdownTokenType +{ + public MarkdownTokenName Name => MarkdownTokenName.Italic; + public string OpenTag => "_"; + public string CloseTag => "_"; + public bool CanBeWithoutCloseTag => false; +} \ No newline at end of file diff --git a/cs/Markdown/TokenTypes/TextMarkdownTokenType.cs b/cs/Markdown/TokenTypes/TextMarkdownTokenType.cs new file mode 100644 index 000000000..c22dccf9e --- /dev/null +++ b/cs/Markdown/TokenTypes/TextMarkdownTokenType.cs @@ -0,0 +1,8 @@ +using Markdown.Enums; + +namespace Markdown.TokenTypes; + +public class TextMarkdownTokenType : IMarkdownTokenType +{ + public MarkdownTokenName Name => MarkdownTokenName.Text; +} \ No newline at end of file diff --git a/cs/MarkdownTests/MarkdownTests.csproj b/cs/MarkdownTests/MarkdownTests.csproj new file mode 100644 index 000000000..e3d362c0e --- /dev/null +++ b/cs/MarkdownTests/MarkdownTests.csproj @@ -0,0 +1,10 @@ + + + + net8.0 + enable + enable + Exe + + + diff --git a/cs/MarkdownTests/Program.cs b/cs/MarkdownTests/Program.cs new file mode 100644 index 000000000..54d354847 --- /dev/null +++ b/cs/MarkdownTests/Program.cs @@ -0,0 +1,9 @@ +namespace MarkdownTests; + +class Program +{ + static void Main(string[] args) + { + Console.WriteLine("Hello, World!"); + } +} \ No newline at end of file diff --git a/cs/clean-code.sln b/cs/clean-code.sln index 2206d54db..e221462d2 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -9,6 +9,10 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigi EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{05E17013-A047-4ACD-ABC6-C3777A6AFCEC}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MarkdownTests", "MarkdownTests\MarkdownTests.csproj", "{B647A53B-1176-4E76-9043-550EFB336372}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,5 +31,13 @@ Global {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU + {05E17013-A047-4ACD-ABC6-C3777A6AFCEC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {05E17013-A047-4ACD-ABC6-C3777A6AFCEC}.Debug|Any CPU.Build.0 = Debug|Any CPU + {05E17013-A047-4ACD-ABC6-C3777A6AFCEC}.Release|Any CPU.ActiveCfg = Release|Any CPU + {05E17013-A047-4ACD-ABC6-C3777A6AFCEC}.Release|Any CPU.Build.0 = Release|Any CPU + {B647A53B-1176-4E76-9043-550EFB336372}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B647A53B-1176-4E76-9043-550EFB336372}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B647A53B-1176-4E76-9043-550EFB336372}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B647A53B-1176-4E76-9043-550EFB336372}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal From c47ccc826d622fbffc72361cc834912e45d25a40 Mon Sep 17 00:00:00 2001 From: Maxim Mineev Date: Fri, 29 Nov 2024 20:46:14 +0500 Subject: [PATCH 2/7] =?UTF-8?q?=D0=94=D0=BE=D1=80=D0=B0=D0=B1=D0=BE=D1=82?= =?UTF-8?q?=D0=BA=D0=B0=20=D0=B0=D1=80=D1=85=D0=B8=D1=82=D0=B5=D0=BA=D1=82?= =?UTF-8?q?=D1=83=D1=80=D1=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/AstNodes/BoldMarkdownNode.cs | 6 +++++ cs/Markdown/AstNodes/HeadingMarkdownNode.cs | 6 +++++ .../AstNodes/IMarkdownNodeWithChildren.cs | 6 +++++ cs/Markdown/AstNodes/ItalicMarkdownNode.cs | 6 +++++ cs/Markdown/AstNodes/MarkdownNode.cs | 6 +++++ cs/Markdown/AstNodes/TextMarkdownNode.cs | 5 ++++ cs/Markdown/Enums/MarkdownTokenName.cs | 5 ++-- cs/Markdown/HtmlParser.cs | 2 +- cs/Markdown/MarkdownLexer.cs | 23 +++++++++++++++++++ cs/Markdown/MarkdownToken.cs | 6 +++-- cs/Markdown/TokenTypes/BoldMarkdownToken.cs | 11 +++++++++ .../TokenTypes/BoldMarkdownTokenType.cs | 11 --------- .../TokenTypes/HeaderMarkdownTokenType.cs | 9 -------- .../TokenTypes/HeadingMarkdownToken.cs | 9 ++++++++ cs/Markdown/TokenTypes/ItalicMarkdownToken.cs | 11 +++++++++ .../TokenTypes/ItalicMarkdownTokenType.cs | 11 --------- cs/Markdown/TokenTypes/TextMarkdownToken.cs | 8 +++++++ .../TokenTypes/TextMarkdownTokenType.cs | 8 ------- 18 files changed, 105 insertions(+), 44 deletions(-) create mode 100644 cs/Markdown/AstNodes/BoldMarkdownNode.cs create mode 100644 cs/Markdown/AstNodes/HeadingMarkdownNode.cs create mode 100644 cs/Markdown/AstNodes/IMarkdownNodeWithChildren.cs create mode 100644 cs/Markdown/AstNodes/ItalicMarkdownNode.cs create mode 100644 cs/Markdown/AstNodes/MarkdownNode.cs create mode 100644 cs/Markdown/AstNodes/TextMarkdownNode.cs create mode 100644 cs/Markdown/TokenTypes/BoldMarkdownToken.cs delete mode 100644 cs/Markdown/TokenTypes/BoldMarkdownTokenType.cs delete mode 100644 cs/Markdown/TokenTypes/HeaderMarkdownTokenType.cs create mode 100644 cs/Markdown/TokenTypes/HeadingMarkdownToken.cs create mode 100644 cs/Markdown/TokenTypes/ItalicMarkdownToken.cs delete mode 100644 cs/Markdown/TokenTypes/ItalicMarkdownTokenType.cs create mode 100644 cs/Markdown/TokenTypes/TextMarkdownToken.cs delete mode 100644 cs/Markdown/TokenTypes/TextMarkdownTokenType.cs diff --git a/cs/Markdown/AstNodes/BoldMarkdownNode.cs b/cs/Markdown/AstNodes/BoldMarkdownNode.cs new file mode 100644 index 000000000..584e58468 --- /dev/null +++ b/cs/Markdown/AstNodes/BoldMarkdownNode.cs @@ -0,0 +1,6 @@ +namespace Markdown.AstNodes; + +public class BoldMarkdownNode(string content) : MarkdownNode(content), IMarkdownNodeWithChildren +{ + public List Children { get; } = []; +} \ No newline at end of file diff --git a/cs/Markdown/AstNodes/HeadingMarkdownNode.cs b/cs/Markdown/AstNodes/HeadingMarkdownNode.cs new file mode 100644 index 000000000..c959cbd6a --- /dev/null +++ b/cs/Markdown/AstNodes/HeadingMarkdownNode.cs @@ -0,0 +1,6 @@ +namespace Markdown.AstNodes; + +public class HeadingMarkdownNode(string content) : MarkdownNode(content), IMarkdownNodeWithChildren +{ + public List Children { get; } = []; +} \ No newline at end of file diff --git a/cs/Markdown/AstNodes/IMarkdownNodeWithChildren.cs b/cs/Markdown/AstNodes/IMarkdownNodeWithChildren.cs new file mode 100644 index 000000000..ca1e5d39c --- /dev/null +++ b/cs/Markdown/AstNodes/IMarkdownNodeWithChildren.cs @@ -0,0 +1,6 @@ +namespace Markdown.AstNodes; + +public interface IMarkdownNodeWithChildren +{ + public List Children { get; } +} \ No newline at end of file diff --git a/cs/Markdown/AstNodes/ItalicMarkdownNode.cs b/cs/Markdown/AstNodes/ItalicMarkdownNode.cs new file mode 100644 index 000000000..045f47dbf --- /dev/null +++ b/cs/Markdown/AstNodes/ItalicMarkdownNode.cs @@ -0,0 +1,6 @@ +namespace Markdown.AstNodes; + +public class ItalicMarkdownNode(string content) : MarkdownNode(content), IMarkdownNodeWithChildren +{ + public List Children { get; } = []; +} \ No newline at end of file diff --git a/cs/Markdown/AstNodes/MarkdownNode.cs b/cs/Markdown/AstNodes/MarkdownNode.cs new file mode 100644 index 000000000..a77f8949e --- /dev/null +++ b/cs/Markdown/AstNodes/MarkdownNode.cs @@ -0,0 +1,6 @@ +namespace Markdown.AstNodes; + +public abstract class MarkdownNode(string content) +{ + public string Content { get; } = content; +} \ No newline at end of file diff --git a/cs/Markdown/AstNodes/TextMarkdownNode.cs b/cs/Markdown/AstNodes/TextMarkdownNode.cs new file mode 100644 index 000000000..3be99dc4e --- /dev/null +++ b/cs/Markdown/AstNodes/TextMarkdownNode.cs @@ -0,0 +1,5 @@ +namespace Markdown.AstNodes; + +public class TextMarkdownNode(string content) : MarkdownNode(content) +{ +} \ No newline at end of file diff --git a/cs/Markdown/Enums/MarkdownTokenName.cs b/cs/Markdown/Enums/MarkdownTokenName.cs index 34fd7494b..d73bd3973 100644 --- a/cs/Markdown/Enums/MarkdownTokenName.cs +++ b/cs/Markdown/Enums/MarkdownTokenName.cs @@ -3,7 +3,8 @@ namespace Markdown.Enums; public enum MarkdownTokenName { Text, - Header, + Heading, Bold, - Italic + Italic, + Root, } \ No newline at end of file diff --git a/cs/Markdown/HtmlParser.cs b/cs/Markdown/HtmlParser.cs index 498938e22..484bccd90 100644 --- a/cs/Markdown/HtmlParser.cs +++ b/cs/Markdown/HtmlParser.cs @@ -8,7 +8,7 @@ public class HtmlParser : IParser { { MarkdownTokenName.Bold, "strong" }, { MarkdownTokenName.Italic, "em" }, - { MarkdownTokenName.Header, "h1" } + { MarkdownTokenName.Heading, "h1" } }; public string Parse(IEnumerable tokens) diff --git a/cs/Markdown/MarkdownLexer.cs b/cs/Markdown/MarkdownLexer.cs index 9d7b6387f..ac84f4085 100644 --- a/cs/Markdown/MarkdownLexer.cs +++ b/cs/Markdown/MarkdownLexer.cs @@ -1,12 +1,35 @@ +using Markdown.TokenTypes; + namespace Markdown; public class MarkdownLexer : ILexer { public int Position { get; private set; } private List Tokens { get; } = []; + private int position; public IEnumerable Tokenize(string input) { throw new NotImplementedException(); } + + private BoldMarkdownToken? TryParseBold(string input) + { + throw new NotImplementedException(); + } + + private ItalicMarkdownToken? TryParseItalic(string input) + { + throw new NotImplementedException(); + } + + private HeadingMarkdownToken? TryParseHeading(string input) + { + throw new NotImplementedException(); + } + + private TextMarkdownToken? TryParseText(string input) + { + throw new NotImplementedException(); + } } \ No newline at end of file diff --git a/cs/Markdown/MarkdownToken.cs b/cs/Markdown/MarkdownToken.cs index 8705b10de..c61ea8a0d 100644 --- a/cs/Markdown/MarkdownToken.cs +++ b/cs/Markdown/MarkdownToken.cs @@ -1,11 +1,13 @@ +using Markdown.Enums; + namespace Markdown; -public class MarkdownToken(int position, string value, IMarkdownTokenType type) : IToken +public abstract class MarkdownToken(int position, string value) : IToken { public int Position { get; } = position; public string Value { get; } = value; public int Length => Value.Length; - public IMarkdownTokenType Type { get; } = type; + public virtual MarkdownTokenName Name { get; } public int GetIndexToNextToken() => Position + Length; } \ No newline at end of file diff --git a/cs/Markdown/TokenTypes/BoldMarkdownToken.cs b/cs/Markdown/TokenTypes/BoldMarkdownToken.cs new file mode 100644 index 000000000..5e2e1b3c8 --- /dev/null +++ b/cs/Markdown/TokenTypes/BoldMarkdownToken.cs @@ -0,0 +1,11 @@ +using Markdown.Enums; + +namespace Markdown.TokenTypes; + +public class BoldMarkdownToken(int position, string value) : MarkdownToken(position, value), IPairedMarkdownTokenType +{ + public override MarkdownTokenName Name => MarkdownTokenName.Bold; + public string OpenTag => "__"; + public string CloseTag => "__"; + public bool CanBeWithoutCloseTag => false; +} \ No newline at end of file diff --git a/cs/Markdown/TokenTypes/BoldMarkdownTokenType.cs b/cs/Markdown/TokenTypes/BoldMarkdownTokenType.cs deleted file mode 100644 index fbdd81092..000000000 --- a/cs/Markdown/TokenTypes/BoldMarkdownTokenType.cs +++ /dev/null @@ -1,11 +0,0 @@ -using Markdown.Enums; - -namespace Markdown.TokenTypes; - -public class BoldMarkdownTokenType : IPairedMarkdownTokenType -{ - public MarkdownTokenName Name => MarkdownTokenName.Bold; - public string OpenTag => "__"; - public string CloseTag => "__"; - public bool CanBeWithoutCloseTag => false; -} \ No newline at end of file diff --git a/cs/Markdown/TokenTypes/HeaderMarkdownTokenType.cs b/cs/Markdown/TokenTypes/HeaderMarkdownTokenType.cs deleted file mode 100644 index 9e1a703f6..000000000 --- a/cs/Markdown/TokenTypes/HeaderMarkdownTokenType.cs +++ /dev/null @@ -1,9 +0,0 @@ -using Markdown.Enums; - -namespace Markdown.TokenTypes; - -public class HeaderMarkdownTokenType : ISingleMarkdownTokenType -{ - public MarkdownTokenName Name => MarkdownTokenName.Header; - public string OpenTag => "# "; -} \ No newline at end of file diff --git a/cs/Markdown/TokenTypes/HeadingMarkdownToken.cs b/cs/Markdown/TokenTypes/HeadingMarkdownToken.cs new file mode 100644 index 000000000..5717bfa47 --- /dev/null +++ b/cs/Markdown/TokenTypes/HeadingMarkdownToken.cs @@ -0,0 +1,9 @@ +using Markdown.Enums; + +namespace Markdown.TokenTypes; + +public class HeadingMarkdownToken(int position, string value) : MarkdownToken(position, value), ISingleMarkdownTokenType +{ + public override MarkdownTokenName Name => MarkdownTokenName.Heading; + public string OpenTag => "# "; +} \ No newline at end of file diff --git a/cs/Markdown/TokenTypes/ItalicMarkdownToken.cs b/cs/Markdown/TokenTypes/ItalicMarkdownToken.cs new file mode 100644 index 000000000..705c1e189 --- /dev/null +++ b/cs/Markdown/TokenTypes/ItalicMarkdownToken.cs @@ -0,0 +1,11 @@ +using Markdown.Enums; + +namespace Markdown.TokenTypes; + +public class ItalicMarkdownToken(int position, string value) : MarkdownToken(position, value), IPairedMarkdownTokenType +{ + public override MarkdownTokenName Name => MarkdownTokenName.Italic; + public string OpenTag => "_"; + public string CloseTag => "_"; + public bool CanBeWithoutCloseTag => false; +} \ No newline at end of file diff --git a/cs/Markdown/TokenTypes/ItalicMarkdownTokenType.cs b/cs/Markdown/TokenTypes/ItalicMarkdownTokenType.cs deleted file mode 100644 index e0949b964..000000000 --- a/cs/Markdown/TokenTypes/ItalicMarkdownTokenType.cs +++ /dev/null @@ -1,11 +0,0 @@ -using Markdown.Enums; - -namespace Markdown.TokenTypes; - -public class ItalicMarkdownTokenType : IPairedMarkdownTokenType -{ - public MarkdownTokenName Name => MarkdownTokenName.Italic; - public string OpenTag => "_"; - public string CloseTag => "_"; - public bool CanBeWithoutCloseTag => false; -} \ No newline at end of file diff --git a/cs/Markdown/TokenTypes/TextMarkdownToken.cs b/cs/Markdown/TokenTypes/TextMarkdownToken.cs new file mode 100644 index 000000000..e073870df --- /dev/null +++ b/cs/Markdown/TokenTypes/TextMarkdownToken.cs @@ -0,0 +1,8 @@ +using Markdown.Enums; + +namespace Markdown.TokenTypes; + +public class TextMarkdownToken(int position, string value) : MarkdownToken(position, value), IMarkdownTokenType +{ + public override MarkdownTokenName Name => MarkdownTokenName.Text; +} \ No newline at end of file diff --git a/cs/Markdown/TokenTypes/TextMarkdownTokenType.cs b/cs/Markdown/TokenTypes/TextMarkdownTokenType.cs deleted file mode 100644 index c22dccf9e..000000000 --- a/cs/Markdown/TokenTypes/TextMarkdownTokenType.cs +++ /dev/null @@ -1,8 +0,0 @@ -using Markdown.Enums; - -namespace Markdown.TokenTypes; - -public class TextMarkdownTokenType : IMarkdownTokenType -{ - public MarkdownTokenName Name => MarkdownTokenName.Text; -} \ No newline at end of file From 4452683fb09f5a2d1963dad08a4301f131b0c761 Mon Sep 17 00:00:00 2001 From: Maxim Mineev Date: Sun, 1 Dec 2024 21:24:16 +0500 Subject: [PATCH 3/7] Lexer --- cs/Markdown/AstNodes/BoldMarkdownNode.cs | 4 + cs/Markdown/AstNodes/HeadingMarkdownNode.cs | 3 + cs/Markdown/AstNodes/ItalicMarkdownNode.cs | 3 + cs/Markdown/AstNodes/MarkdownNode.cs | 3 + cs/Markdown/AstNodes/RootMarkdownNode.cs | 9 + cs/Markdown/AstNodes/TextMarkdownNode.cs | 3 + cs/Markdown/Enums/MarkdownNodeName.cs | 10 ++ cs/Markdown/Enums/MarkdownTokenName.cs | 10 +- cs/Markdown/HtmlParser.cs | 18 -- cs/Markdown/Interfaces/ILexer.cs | 2 +- cs/Markdown/Interfaces/IMarkdownTokenType.cs | 8 - .../Interfaces/IPairedMarkdownTokenType.cs | 7 - cs/Markdown/Interfaces/IParser.cs | 4 +- .../Interfaces/ISingleMarkdownTokenType.cs | 6 - cs/Markdown/Interfaces/IToken.cs | 3 + cs/Markdown/MarkdownLexer.cs | 157 ++++++++++++++++-- cs/Markdown/MarkdownToHtmlConverter.cs | 11 +- cs/Markdown/MarkdownToken.cs | 13 -- cs/Markdown/TokenTypes/BoldMarkdownToken.cs | 11 -- .../TokenTypes/HeadingMarkdownToken.cs | 9 - cs/Markdown/TokenTypes/ItalicMarkdownToken.cs | 11 -- cs/Markdown/TokenTypes/TextMarkdownToken.cs | 8 - cs/Markdown/Tokens/BoldToken.cs | 9 + cs/Markdown/Tokens/HeadingToken.cs | 9 + cs/Markdown/Tokens/ItalicToken.cs | 9 + cs/Markdown/Tokens/NewLineToken.cs | 9 + cs/Markdown/Tokens/NumberToken.cs | 9 + cs/Markdown/Tokens/SpaceToken.cs | 9 + cs/Markdown/Tokens/TextToken.cs | 9 + cs/Markdown/Tokens/Token.cs | 18 ++ cs/MarkdownTests/LexerTests.cs | 99 +++++++++++ cs/MarkdownTests/MarkdownTests.csproj | 10 ++ cs/MarkdownTests/Program.cs | 9 - 33 files changed, 388 insertions(+), 124 deletions(-) create mode 100644 cs/Markdown/AstNodes/RootMarkdownNode.cs create mode 100644 cs/Markdown/Enums/MarkdownNodeName.cs delete mode 100644 cs/Markdown/HtmlParser.cs delete mode 100644 cs/Markdown/Interfaces/IMarkdownTokenType.cs delete mode 100644 cs/Markdown/Interfaces/IPairedMarkdownTokenType.cs delete mode 100644 cs/Markdown/Interfaces/ISingleMarkdownTokenType.cs delete mode 100644 cs/Markdown/MarkdownToken.cs delete mode 100644 cs/Markdown/TokenTypes/BoldMarkdownToken.cs delete mode 100644 cs/Markdown/TokenTypes/HeadingMarkdownToken.cs delete mode 100644 cs/Markdown/TokenTypes/ItalicMarkdownToken.cs delete mode 100644 cs/Markdown/TokenTypes/TextMarkdownToken.cs create mode 100644 cs/Markdown/Tokens/BoldToken.cs create mode 100644 cs/Markdown/Tokens/HeadingToken.cs create mode 100644 cs/Markdown/Tokens/ItalicToken.cs create mode 100644 cs/Markdown/Tokens/NewLineToken.cs create mode 100644 cs/Markdown/Tokens/NumberToken.cs create mode 100644 cs/Markdown/Tokens/SpaceToken.cs create mode 100644 cs/Markdown/Tokens/TextToken.cs create mode 100644 cs/Markdown/Tokens/Token.cs create mode 100644 cs/MarkdownTests/LexerTests.cs delete mode 100644 cs/MarkdownTests/Program.cs diff --git a/cs/Markdown/AstNodes/BoldMarkdownNode.cs b/cs/Markdown/AstNodes/BoldMarkdownNode.cs index 584e58468..b73ca44e3 100644 --- a/cs/Markdown/AstNodes/BoldMarkdownNode.cs +++ b/cs/Markdown/AstNodes/BoldMarkdownNode.cs @@ -1,6 +1,10 @@ +using System.Text; +using Markdown.Enums; + namespace Markdown.AstNodes; public class BoldMarkdownNode(string content) : MarkdownNode(content), IMarkdownNodeWithChildren { + public override MarkdownNodeName Type => MarkdownNodeName.Bold; public List Children { get; } = []; } \ No newline at end of file diff --git a/cs/Markdown/AstNodes/HeadingMarkdownNode.cs b/cs/Markdown/AstNodes/HeadingMarkdownNode.cs index c959cbd6a..028ca540c 100644 --- a/cs/Markdown/AstNodes/HeadingMarkdownNode.cs +++ b/cs/Markdown/AstNodes/HeadingMarkdownNode.cs @@ -1,6 +1,9 @@ +using Markdown.Enums; + namespace Markdown.AstNodes; public class HeadingMarkdownNode(string content) : MarkdownNode(content), IMarkdownNodeWithChildren { + public override MarkdownNodeName Type => MarkdownNodeName.Heading; public List Children { get; } = []; } \ No newline at end of file diff --git a/cs/Markdown/AstNodes/ItalicMarkdownNode.cs b/cs/Markdown/AstNodes/ItalicMarkdownNode.cs index 045f47dbf..deb6046f9 100644 --- a/cs/Markdown/AstNodes/ItalicMarkdownNode.cs +++ b/cs/Markdown/AstNodes/ItalicMarkdownNode.cs @@ -1,6 +1,9 @@ +using Markdown.Enums; + namespace Markdown.AstNodes; public class ItalicMarkdownNode(string content) : MarkdownNode(content), IMarkdownNodeWithChildren { + public override MarkdownNodeName Type => MarkdownNodeName.Italic; public List Children { get; } = []; } \ No newline at end of file diff --git a/cs/Markdown/AstNodes/MarkdownNode.cs b/cs/Markdown/AstNodes/MarkdownNode.cs index a77f8949e..8d1976fea 100644 --- a/cs/Markdown/AstNodes/MarkdownNode.cs +++ b/cs/Markdown/AstNodes/MarkdownNode.cs @@ -1,6 +1,9 @@ +using Markdown.Enums; + namespace Markdown.AstNodes; public abstract class MarkdownNode(string content) { + public abstract MarkdownNodeName Type { get; } public string Content { get; } = content; } \ No newline at end of file diff --git a/cs/Markdown/AstNodes/RootMarkdownNode.cs b/cs/Markdown/AstNodes/RootMarkdownNode.cs new file mode 100644 index 000000000..f463a33d8 --- /dev/null +++ b/cs/Markdown/AstNodes/RootMarkdownNode.cs @@ -0,0 +1,9 @@ +using Markdown.Enums; + +namespace Markdown.AstNodes; + +public class RootMarkdownNode(string content) : MarkdownNode(content), IMarkdownNodeWithChildren +{ + public override MarkdownNodeName Type => MarkdownNodeName.Root; + public List Children { get; } = []; +} \ No newline at end of file diff --git a/cs/Markdown/AstNodes/TextMarkdownNode.cs b/cs/Markdown/AstNodes/TextMarkdownNode.cs index 3be99dc4e..453eaa665 100644 --- a/cs/Markdown/AstNodes/TextMarkdownNode.cs +++ b/cs/Markdown/AstNodes/TextMarkdownNode.cs @@ -1,5 +1,8 @@ +using Markdown.Enums; + namespace Markdown.AstNodes; public class TextMarkdownNode(string content) : MarkdownNode(content) { + public override MarkdownNodeName Type => MarkdownNodeName.Text; } \ No newline at end of file diff --git a/cs/Markdown/Enums/MarkdownNodeName.cs b/cs/Markdown/Enums/MarkdownNodeName.cs new file mode 100644 index 000000000..6a59de752 --- /dev/null +++ b/cs/Markdown/Enums/MarkdownNodeName.cs @@ -0,0 +1,10 @@ +namespace Markdown.Enums; + +public enum MarkdownNodeName +{ + Bold, + Italic, + Heading, + Text, + Root, +} \ No newline at end of file diff --git a/cs/Markdown/Enums/MarkdownTokenName.cs b/cs/Markdown/Enums/MarkdownTokenName.cs index d73bd3973..2467027fc 100644 --- a/cs/Markdown/Enums/MarkdownTokenName.cs +++ b/cs/Markdown/Enums/MarkdownTokenName.cs @@ -2,9 +2,11 @@ namespace Markdown.Enums; public enum MarkdownTokenName { - Text, - Heading, - Bold, Italic, - Root, + Bold, + Heading, + Text, + NewLine, + Space, + Number, } \ No newline at end of file diff --git a/cs/Markdown/HtmlParser.cs b/cs/Markdown/HtmlParser.cs deleted file mode 100644 index 484bccd90..000000000 --- a/cs/Markdown/HtmlParser.cs +++ /dev/null @@ -1,18 +0,0 @@ -using Markdown.Enums; - -namespace Markdown; - -public class HtmlParser : IParser -{ - private static readonly Dictionary TokenToHtmlTag = new () - { - { MarkdownTokenName.Bold, "strong" }, - { MarkdownTokenName.Italic, "em" }, - { MarkdownTokenName.Heading, "h1" } - }; - - public string Parse(IEnumerable tokens) - { - throw new NotImplementedException(); - } -} \ No newline at end of file diff --git a/cs/Markdown/Interfaces/ILexer.cs b/cs/Markdown/Interfaces/ILexer.cs index e13406f0b..f1d4032ea 100644 --- a/cs/Markdown/Interfaces/ILexer.cs +++ b/cs/Markdown/Interfaces/ILexer.cs @@ -2,5 +2,5 @@ namespace Markdown; public interface ILexer { - IEnumerable Tokenize(string input); + List Tokenize(string input); } \ No newline at end of file diff --git a/cs/Markdown/Interfaces/IMarkdownTokenType.cs b/cs/Markdown/Interfaces/IMarkdownTokenType.cs deleted file mode 100644 index 14f297370..000000000 --- a/cs/Markdown/Interfaces/IMarkdownTokenType.cs +++ /dev/null @@ -1,8 +0,0 @@ -using Markdown.Enums; - -namespace Markdown; - -public interface IMarkdownTokenType -{ - MarkdownTokenName Name { get; } -} \ No newline at end of file diff --git a/cs/Markdown/Interfaces/IPairedMarkdownTokenType.cs b/cs/Markdown/Interfaces/IPairedMarkdownTokenType.cs deleted file mode 100644 index 896446163..000000000 --- a/cs/Markdown/Interfaces/IPairedMarkdownTokenType.cs +++ /dev/null @@ -1,7 +0,0 @@ -namespace Markdown; - -public interface IPairedMarkdownTokenType : IMarkdownTokenType -{ - string CloseTag { get; } - bool CanBeWithoutCloseTag { get; } -} \ No newline at end of file diff --git a/cs/Markdown/Interfaces/IParser.cs b/cs/Markdown/Interfaces/IParser.cs index e6f73bc06..5cf7a0621 100644 --- a/cs/Markdown/Interfaces/IParser.cs +++ b/cs/Markdown/Interfaces/IParser.cs @@ -1,6 +1,8 @@ +using Markdown.AstNodes; + namespace Markdown; public interface IParser { - string Parse(IEnumerable tokens); + RootMarkdownNode Parse(List tokens); } \ No newline at end of file diff --git a/cs/Markdown/Interfaces/ISingleMarkdownTokenType.cs b/cs/Markdown/Interfaces/ISingleMarkdownTokenType.cs deleted file mode 100644 index 2e1e6a4cb..000000000 --- a/cs/Markdown/Interfaces/ISingleMarkdownTokenType.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace Markdown; - -public interface ISingleMarkdownTokenType : IMarkdownTokenType -{ - string OpenTag { get; } -} \ No newline at end of file diff --git a/cs/Markdown/Interfaces/IToken.cs b/cs/Markdown/Interfaces/IToken.cs index 0c11513ac..8e60c7590 100644 --- a/cs/Markdown/Interfaces/IToken.cs +++ b/cs/Markdown/Interfaces/IToken.cs @@ -1,7 +1,10 @@ +using Markdown.Enums; + namespace Markdown; public interface IToken { + MarkdownTokenName Name { get; } int Position { get; } int Length { get; } string Value { get; } diff --git a/cs/Markdown/MarkdownLexer.cs b/cs/Markdown/MarkdownLexer.cs index ac84f4085..c81ac69cd 100644 --- a/cs/Markdown/MarkdownLexer.cs +++ b/cs/Markdown/MarkdownLexer.cs @@ -1,35 +1,158 @@ -using Markdown.TokenTypes; +using System.Text; +using Markdown.Tokens; namespace Markdown; public class MarkdownLexer : ILexer { - public int Position { get; private set; } - private List Tokens { get; } = []; private int position; - - public IEnumerable Tokenize(string input) + private readonly List tokens = []; + + public List Tokenize(string input) { - throw new NotImplementedException(); + position = 0; + var end = input.Length; + var stack = new Stack<(string tag, int position)>(); + + while (InBoundary(position)) + { + switch (input[position]) + { + case ' ': + tokens.Add(new SpaceToken(position++)); + break; + case '\n': + ParseNewLineAndAdvance(stack); + break; + case '\\': + ParseEscapeAndAdvance(input); + break; + case '_': + ParseItalicOrBoldAndAdvance(input, stack); + break; + case '#': + ParseHeadingAndAdvance(input, stack); + break; + default: + ParseTextAndAdvance(input); + break; + } + } + + return tokens; + + bool InBoundary(int i) => i < end; } - private BoldMarkdownToken? TryParseBold(string input) + private bool IsStartOfParagraph(string input) => position == 0 || position > 0 && input[position - 1] == '\n'; + + private void ParseHeadingAndAdvance(string input, Stack<(string tag, int position)> stack) { - throw new NotImplementedException(); + if (position + 1 < input.Length && input[position + 1] == ' ' && IsStartOfParagraph(input)) + { + tokens.Add(new HeadingToken(position)); + position += 2; + stack.Push(("# ", position)); + } + else + { + tokens.Add(new TextToken(position, "#")); + position++; + } } - - private ItalicMarkdownToken? TryParseItalic(string input) + + private void ParseTextAndAdvance(string input) { - throw new NotImplementedException(); + var sb = new StringBuilder(); + var start = position; + var endChars = new[] { '#', '_', '\n', '\\', ' ' }; + while (position < input.Length && !endChars.Contains(input[position]) && !char.IsDigit(input[position])) + sb.Append(input[position++]); + + if (sb.Length > 0) tokens.Add(new TextToken(start, sb.ToString())); + if (position < input.Length && char.IsDigit(input[position])) ParseNumberAndAdvance(input); } - - private HeadingMarkdownToken? TryParseHeading(string input) + + private void ParseNumberAndAdvance(string input) { - throw new NotImplementedException(); + var sb = new StringBuilder(); + var start = position; + while (position < input.Length && (char.IsDigit(input[position]) || input[position] == '_')) + sb.Append(input[position++]); + tokens.Add(new NumberToken(start, sb.ToString())); } - - private TextMarkdownToken? TryParseText(string input) + + private void ParseItalicOrBoldAndAdvance(string input, Stack<(string tag, int position)> stack) { - throw new NotImplementedException(); + var canBeBold = position + 1 < input.Length && input[position + 1] == '_'; + if (stack.Count > 0 && stack.Peek().tag == "__" && canBeBold) ParseBoldAndAdvance(stack); + else if (stack.Count > 0 && stack.Peek().tag == "_") ParseItalicAndAdvance(stack); + else if (canBeBold) ParseBoldAndAdvance(stack); + else ParseItalicAndAdvance(stack); + } + + private void ParseBoldAndAdvance(Stack<(string tag, int position)> stack) + { + if (stack.Count == 0 || stack.Count > 0 && (stack.Peek().tag == "# " || stack.Peek().tag == "_")) + stack.Push(("__", position)); + else if (stack.Count > 0 && stack.Peek().tag == "__") + stack.Pop(); + else throw new Exception("Не рассмотрел какой-то случай в жирном"); + + tokens.Add(new BoldToken(position)); + position += 2; + } + + private void ParseItalicAndAdvance(Stack<(string tag, int position)> stack) + { + if (stack.Count == 0 || stack.Count > 0 && (stack.Peek().tag == "__" || stack.Peek().tag == "# ")) + stack.Push(("_", position)); + else if (stack.Count > 0 && stack.Peek().tag == "_") + stack.Pop(); + else throw new Exception("Не рассмотрел какой-то случай в курсиве"); + tokens.Add(new ItalicToken(position)); + position++; + } + + private void ParseNewLineAndAdvance(Stack<(string tag, int position)> stack) + { + tokens.Add(new NewLineToken(position)); + stack.Clear(); + position++; + } + + private void ParseEscapeAndAdvance(string input) + { + if (position + 1 >= input.Length) + { + tokens.Add(new TextToken(position, "\\")); + return; + } + + if (input[position + 1] == '#') + { + tokens.Add(new TextToken(position, "#")); + position += 2; + } + else if (position + 2 < input.Length && input[position + 1] == '_' && input[position + 2] == '_') + { + tokens.Add(new TextToken(position, "__")); + position += 3; + } + else if (input[position + 1] == '_') + { + tokens.Add(new TextToken(position, "_")); + position += 2; + } + else if (input[position + 1] == '\\') + { + tokens.Add(new TextToken(position, "\\")); + position += 2; + } + else + { + tokens.Add(new TextToken(position, input[position].ToString() + input[position + 1])); + position += 2; + } } } \ No newline at end of file diff --git a/cs/Markdown/MarkdownToHtmlConverter.cs b/cs/Markdown/MarkdownToHtmlConverter.cs index 6130bb8ab..c4adb2186 100644 --- a/cs/Markdown/MarkdownToHtmlConverter.cs +++ b/cs/Markdown/MarkdownToHtmlConverter.cs @@ -1,3 +1,5 @@ +using Markdown.AstNodes; + namespace Markdown; public class MarkdownToHtmlConverter(ILexer lexer, IParser parser) @@ -8,6 +10,13 @@ public class MarkdownToHtmlConverter(ILexer lexer, IParser parser) public string Convert(string input) { var tokens = Lexer.Tokenize(input); - return Parser.Parse(tokens); + var enumerable = tokens.ToList(); + var ast = Parser.Parse(enumerable); + return ConvertAstToHtml(ast); + } + + private string ConvertAstToHtml(MarkdownNode ast) + { + throw new NotImplementedException(); } } \ No newline at end of file diff --git a/cs/Markdown/MarkdownToken.cs b/cs/Markdown/MarkdownToken.cs deleted file mode 100644 index c61ea8a0d..000000000 --- a/cs/Markdown/MarkdownToken.cs +++ /dev/null @@ -1,13 +0,0 @@ -using Markdown.Enums; - -namespace Markdown; - -public abstract class MarkdownToken(int position, string value) : IToken -{ - public int Position { get; } = position; - public string Value { get; } = value; - public int Length => Value.Length; - public virtual MarkdownTokenName Name { get; } - - public int GetIndexToNextToken() => Position + Length; -} \ No newline at end of file diff --git a/cs/Markdown/TokenTypes/BoldMarkdownToken.cs b/cs/Markdown/TokenTypes/BoldMarkdownToken.cs deleted file mode 100644 index 5e2e1b3c8..000000000 --- a/cs/Markdown/TokenTypes/BoldMarkdownToken.cs +++ /dev/null @@ -1,11 +0,0 @@ -using Markdown.Enums; - -namespace Markdown.TokenTypes; - -public class BoldMarkdownToken(int position, string value) : MarkdownToken(position, value), IPairedMarkdownTokenType -{ - public override MarkdownTokenName Name => MarkdownTokenName.Bold; - public string OpenTag => "__"; - public string CloseTag => "__"; - public bool CanBeWithoutCloseTag => false; -} \ No newline at end of file diff --git a/cs/Markdown/TokenTypes/HeadingMarkdownToken.cs b/cs/Markdown/TokenTypes/HeadingMarkdownToken.cs deleted file mode 100644 index 5717bfa47..000000000 --- a/cs/Markdown/TokenTypes/HeadingMarkdownToken.cs +++ /dev/null @@ -1,9 +0,0 @@ -using Markdown.Enums; - -namespace Markdown.TokenTypes; - -public class HeadingMarkdownToken(int position, string value) : MarkdownToken(position, value), ISingleMarkdownTokenType -{ - public override MarkdownTokenName Name => MarkdownTokenName.Heading; - public string OpenTag => "# "; -} \ No newline at end of file diff --git a/cs/Markdown/TokenTypes/ItalicMarkdownToken.cs b/cs/Markdown/TokenTypes/ItalicMarkdownToken.cs deleted file mode 100644 index 705c1e189..000000000 --- a/cs/Markdown/TokenTypes/ItalicMarkdownToken.cs +++ /dev/null @@ -1,11 +0,0 @@ -using Markdown.Enums; - -namespace Markdown.TokenTypes; - -public class ItalicMarkdownToken(int position, string value) : MarkdownToken(position, value), IPairedMarkdownTokenType -{ - public override MarkdownTokenName Name => MarkdownTokenName.Italic; - public string OpenTag => "_"; - public string CloseTag => "_"; - public bool CanBeWithoutCloseTag => false; -} \ No newline at end of file diff --git a/cs/Markdown/TokenTypes/TextMarkdownToken.cs b/cs/Markdown/TokenTypes/TextMarkdownToken.cs deleted file mode 100644 index e073870df..000000000 --- a/cs/Markdown/TokenTypes/TextMarkdownToken.cs +++ /dev/null @@ -1,8 +0,0 @@ -using Markdown.Enums; - -namespace Markdown.TokenTypes; - -public class TextMarkdownToken(int position, string value) : MarkdownToken(position, value), IMarkdownTokenType -{ - public override MarkdownTokenName Name => MarkdownTokenName.Text; -} \ No newline at end of file diff --git a/cs/Markdown/Tokens/BoldToken.cs b/cs/Markdown/Tokens/BoldToken.cs new file mode 100644 index 000000000..58c6b2bdb --- /dev/null +++ b/cs/Markdown/Tokens/BoldToken.cs @@ -0,0 +1,9 @@ +using Markdown.Enums; + +namespace Markdown.Tokens; + +public class BoldToken(int position) : Token(position) +{ + public override MarkdownTokenName Name => MarkdownTokenName.Bold; + public override string Value => "__"; +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/HeadingToken.cs b/cs/Markdown/Tokens/HeadingToken.cs new file mode 100644 index 000000000..3b4c7ee70 --- /dev/null +++ b/cs/Markdown/Tokens/HeadingToken.cs @@ -0,0 +1,9 @@ +using Markdown.Enums; + +namespace Markdown.Tokens; + +public class HeadingToken(int position) : Token(position) +{ + public override MarkdownTokenName Name => MarkdownTokenName.Heading; + public override string Value => "# "; +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/ItalicToken.cs b/cs/Markdown/Tokens/ItalicToken.cs new file mode 100644 index 000000000..c48dbadcf --- /dev/null +++ b/cs/Markdown/Tokens/ItalicToken.cs @@ -0,0 +1,9 @@ +using Markdown.Enums; + +namespace Markdown.Tokens; + +public class ItalicToken(int position) : Token(position) +{ + public override MarkdownTokenName Name => MarkdownTokenName.Italic; + public override string Value => "_"; +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/NewLineToken.cs b/cs/Markdown/Tokens/NewLineToken.cs new file mode 100644 index 000000000..934e882b7 --- /dev/null +++ b/cs/Markdown/Tokens/NewLineToken.cs @@ -0,0 +1,9 @@ +using Markdown.Enums; + +namespace Markdown.Tokens; + +public class NewLineToken(int position) : Token(position) +{ + public override MarkdownTokenName Name => MarkdownTokenName.NewLine; + public override string Value => "\n"; +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/NumberToken.cs b/cs/Markdown/Tokens/NumberToken.cs new file mode 100644 index 000000000..72cbcb0b9 --- /dev/null +++ b/cs/Markdown/Tokens/NumberToken.cs @@ -0,0 +1,9 @@ +using Markdown.Enums; + +namespace Markdown.Tokens; + +public class NumberToken(int position, string value) : Token(position) +{ + public override MarkdownTokenName Name => MarkdownTokenName.Number; + public override string Value => value; +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/SpaceToken.cs b/cs/Markdown/Tokens/SpaceToken.cs new file mode 100644 index 000000000..371179f32 --- /dev/null +++ b/cs/Markdown/Tokens/SpaceToken.cs @@ -0,0 +1,9 @@ +using Markdown.Enums; + +namespace Markdown.Tokens; + +public class SpaceToken(int position) : Token(position) +{ + public override MarkdownTokenName Name => MarkdownTokenName.Space; + public override string Value => " "; +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/TextToken.cs b/cs/Markdown/Tokens/TextToken.cs new file mode 100644 index 000000000..2e3f88b97 --- /dev/null +++ b/cs/Markdown/Tokens/TextToken.cs @@ -0,0 +1,9 @@ +using Markdown.Enums; + +namespace Markdown.Tokens; + +public class TextToken(int position, string value) : Token(position) +{ + public override MarkdownTokenName Name => MarkdownTokenName.Text; + public override string Value => value; +} \ No newline at end of file diff --git a/cs/Markdown/Tokens/Token.cs b/cs/Markdown/Tokens/Token.cs new file mode 100644 index 000000000..eecbc3ec7 --- /dev/null +++ b/cs/Markdown/Tokens/Token.cs @@ -0,0 +1,18 @@ +using Markdown.Enums; + +namespace Markdown.Tokens; + +public abstract class Token(int position) : IToken +{ + public abstract MarkdownTokenName Name { get; } + public abstract string Value { get; } + public int Position => position; + public int Length => Value.Length; + public int GetIndexToNextToken() => Position + Length; + + public override bool Equals(object? obj) => obj is Token token && Equals(token); + + public override int GetHashCode() => HashCode.Combine((int)Name, Value); + + private bool Equals(Token token) => Name == token.Name && Position == token.Position && Value == token.Value; +} \ No newline at end of file diff --git a/cs/MarkdownTests/LexerTests.cs b/cs/MarkdownTests/LexerTests.cs new file mode 100644 index 000000000..6199bae31 --- /dev/null +++ b/cs/MarkdownTests/LexerTests.cs @@ -0,0 +1,99 @@ +using FluentAssertions; +using Markdown; +using Markdown.Tokens; +using NUnit.Framework; + +namespace MarkdownTests; + +public class LexerTests +{ + private MarkdownLexer lexer; + + [SetUp] + public void Setup() => lexer = new MarkdownLexer(); + + [Test] + public void Tokenize_WorksCorrect_WhenItalic() + { + const string text = "_italic_"; + var expected = new IToken[] { new ItalicToken(0), new TextToken(1, "italic"), new ItalicToken(7) }; + var actual = lexer.Tokenize(text); + actual.Should().BeEquivalentTo(expected, o => o.WithStrictOrdering()); + } + + [Test] + public void Tokenize_WorksCorrect_WhenBold() + { + const string text = "__bold__"; + var expected = new IToken[] { new BoldToken(0), new TextToken(2, "bold"), new BoldToken(6) }; + var actual = lexer.Tokenize(text); + actual.Should().BeEquivalentTo(expected, o => o.WithStrictOrdering()); + } + + [Test] + public void Tokenize_WorksCorrect_WhenHeadingWithoutCloseTag() + { + const string text = "# heading"; + var expected = new IToken[] { new HeadingToken(0), new TextToken(2, "heading") }; + var actual = lexer.Tokenize(text); + actual.Should().BeEquivalentTo(expected, o => o.WithStrictOrdering()); + } + + [Test] + public void Tokenize_WorksCorrect_WhenHeadingWithCloseTag() + { + const string text = "# heading\ntext"; + var expected = new IToken[] + { + new HeadingToken(0), new TextToken(2, "heading"), new NewLineToken(9), new TextToken(10, "text") + }; + var actual = lexer.Tokenize(text); + actual.Should().BeEquivalentTo(expected, o => o.WithStrictOrdering()); + } + + [Test] + public void Tokenize_WorksCorrect_WithItalicInBold() + { + const string text = "__bold _italic___"; + var expected = new IToken[] + { + new BoldToken(0), new TextToken(2, "bold"), new SpaceToken(6), new ItalicToken(7), + new TextToken(8, "italic"), new ItalicToken(14), new BoldToken(15) + }; + var actual = lexer.Tokenize(text); + actual.Should().BeEquivalentTo(expected, o => o.WithStrictOrdering()); + } + + [Test] + public void Tokenize_WorksCorrect_WithAllTags() + { + const string text = "# a b\\_c _d_ __e__\n___f___ 1_234"; + var expected = new IToken[] + { + new HeadingToken(0), + new TextToken(2, "a"), + new SpaceToken(3), + new TextToken(4, "b"), + new TextToken(5, @"_"), + new TextToken(7, @"c"), + new SpaceToken(8), + new ItalicToken(9), + new TextToken(10, "d"), + new ItalicToken(11), + new SpaceToken(12), + new BoldToken(13), + new TextToken(15, "e"), + new BoldToken(16), + new NewLineToken(18), + new BoldToken(19), + new ItalicToken(21), + new TextToken(22, "f"), + new ItalicToken(23), + new BoldToken(24), + new SpaceToken(26), + new NumberToken(27, "1_234") + }; + var actual = lexer.Tokenize(text); + actual.Should().BeEquivalentTo(expected, o => o.WithStrictOrdering()); + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/MarkdownTests.csproj b/cs/MarkdownTests/MarkdownTests.csproj index e3d362c0e..e26fc91ad 100644 --- a/cs/MarkdownTests/MarkdownTests.csproj +++ b/cs/MarkdownTests/MarkdownTests.csproj @@ -7,4 +7,14 @@ Exe + + + + + + + + + + diff --git a/cs/MarkdownTests/Program.cs b/cs/MarkdownTests/Program.cs deleted file mode 100644 index 54d354847..000000000 --- a/cs/MarkdownTests/Program.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace MarkdownTests; - -class Program -{ - static void Main(string[] args) - { - Console.WriteLine("Hello, World!"); - } -} \ No newline at end of file From f01e8e552c4235087640bbd7dab81d2215c7aaf7 Mon Sep 17 00:00:00 2001 From: Maxim Mineev Date: Mon, 2 Dec 2024 23:50:56 +0500 Subject: [PATCH 4/7] =?UTF-8?q?Lexer,=20Parser,=20Converter=20+=20=D0=A2?= =?UTF-8?q?=D0=B5=D1=81=D1=82=D1=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/AstNodes/BoldMarkdownNode.cs | 3 +- cs/Markdown/AstNodes/HeadingMarkdownNode.cs | 2 +- cs/Markdown/AstNodes/ItalicMarkdownNode.cs | 2 +- cs/Markdown/AstNodes/MarkdownNode.cs | 12 +- cs/Markdown/AstNodes/RootMarkdownNode.cs | 2 +- cs/Markdown/AstNodes/TextMarkdownNode.cs | 3 +- cs/Markdown/Interfaces/IToken.cs | 1 + cs/Markdown/MarkdownLexer.cs | 212 +++++++++++++------- cs/Markdown/MarkdownParser.cs | 209 +++++++++++++++++++ cs/Markdown/MarkdownToHtmlConverter.cs | 44 +++- cs/Markdown/Tokens/Token.cs | 1 + cs/MarkdownTests/MarkdownConverterTests.cs | 103 ++++++++++ cs/MarkdownTests/MarkdownTests.csproj | 13 ++ cs/MarkdownTests/ParserTests.cs | 164 +++++++++++++++ cs/MarkdownTests/TestsData/expected.txt | 73 +++++++ cs/MarkdownTests/TestsData/test.txt | 73 +++++++ 16 files changed, 828 insertions(+), 89 deletions(-) create mode 100644 cs/Markdown/MarkdownParser.cs create mode 100644 cs/MarkdownTests/MarkdownConverterTests.cs create mode 100644 cs/MarkdownTests/ParserTests.cs create mode 100644 cs/MarkdownTests/TestsData/expected.txt create mode 100644 cs/MarkdownTests/TestsData/test.txt diff --git a/cs/Markdown/AstNodes/BoldMarkdownNode.cs b/cs/Markdown/AstNodes/BoldMarkdownNode.cs index b73ca44e3..aaf2b37ba 100644 --- a/cs/Markdown/AstNodes/BoldMarkdownNode.cs +++ b/cs/Markdown/AstNodes/BoldMarkdownNode.cs @@ -1,9 +1,8 @@ -using System.Text; using Markdown.Enums; namespace Markdown.AstNodes; -public class BoldMarkdownNode(string content) : MarkdownNode(content), IMarkdownNodeWithChildren +public class BoldMarkdownNode : MarkdownNode, IMarkdownNodeWithChildren { public override MarkdownNodeName Type => MarkdownNodeName.Bold; public List Children { get; } = []; diff --git a/cs/Markdown/AstNodes/HeadingMarkdownNode.cs b/cs/Markdown/AstNodes/HeadingMarkdownNode.cs index 028ca540c..994a2e04f 100644 --- a/cs/Markdown/AstNodes/HeadingMarkdownNode.cs +++ b/cs/Markdown/AstNodes/HeadingMarkdownNode.cs @@ -2,7 +2,7 @@ namespace Markdown.AstNodes; -public class HeadingMarkdownNode(string content) : MarkdownNode(content), IMarkdownNodeWithChildren +public class HeadingMarkdownNode : MarkdownNode, IMarkdownNodeWithChildren { public override MarkdownNodeName Type => MarkdownNodeName.Heading; public List Children { get; } = []; diff --git a/cs/Markdown/AstNodes/ItalicMarkdownNode.cs b/cs/Markdown/AstNodes/ItalicMarkdownNode.cs index deb6046f9..692df2912 100644 --- a/cs/Markdown/AstNodes/ItalicMarkdownNode.cs +++ b/cs/Markdown/AstNodes/ItalicMarkdownNode.cs @@ -2,7 +2,7 @@ namespace Markdown.AstNodes; -public class ItalicMarkdownNode(string content) : MarkdownNode(content), IMarkdownNodeWithChildren +public class ItalicMarkdownNode : MarkdownNode, IMarkdownNodeWithChildren { public override MarkdownNodeName Type => MarkdownNodeName.Italic; public List Children { get; } = []; diff --git a/cs/Markdown/AstNodes/MarkdownNode.cs b/cs/Markdown/AstNodes/MarkdownNode.cs index 8d1976fea..18e17e0ba 100644 --- a/cs/Markdown/AstNodes/MarkdownNode.cs +++ b/cs/Markdown/AstNodes/MarkdownNode.cs @@ -2,8 +2,16 @@ namespace Markdown.AstNodes; -public abstract class MarkdownNode(string content) +public abstract class MarkdownNode { public abstract MarkdownNodeName Type { get; } - public string Content { get; } = content; + + public override bool Equals(object? obj) + { + if (this is IMarkdownNodeWithChildren node && obj is IMarkdownNodeWithChildren other) + return this.GetType() == other.GetType() && node.Children.SequenceEqual(other.Children); + if (this is TextMarkdownNode valueNode && obj is TextMarkdownNode otherValueNode) + return valueNode.Content.Equals(otherValueNode.Content); + return false; + } } \ No newline at end of file diff --git a/cs/Markdown/AstNodes/RootMarkdownNode.cs b/cs/Markdown/AstNodes/RootMarkdownNode.cs index f463a33d8..96e5d6c5b 100644 --- a/cs/Markdown/AstNodes/RootMarkdownNode.cs +++ b/cs/Markdown/AstNodes/RootMarkdownNode.cs @@ -2,7 +2,7 @@ namespace Markdown.AstNodes; -public class RootMarkdownNode(string content) : MarkdownNode(content), IMarkdownNodeWithChildren +public class RootMarkdownNode : MarkdownNode, IMarkdownNodeWithChildren { public override MarkdownNodeName Type => MarkdownNodeName.Root; public List Children { get; } = []; diff --git a/cs/Markdown/AstNodes/TextMarkdownNode.cs b/cs/Markdown/AstNodes/TextMarkdownNode.cs index 453eaa665..039862a9c 100644 --- a/cs/Markdown/AstNodes/TextMarkdownNode.cs +++ b/cs/Markdown/AstNodes/TextMarkdownNode.cs @@ -2,7 +2,8 @@ namespace Markdown.AstNodes; -public class TextMarkdownNode(string content) : MarkdownNode(content) +public class TextMarkdownNode(string content) : MarkdownNode { public override MarkdownNodeName Type => MarkdownNodeName.Text; + public string Content => content; } \ No newline at end of file diff --git a/cs/Markdown/Interfaces/IToken.cs b/cs/Markdown/Interfaces/IToken.cs index 8e60c7590..9404c8c60 100644 --- a/cs/Markdown/Interfaces/IToken.cs +++ b/cs/Markdown/Interfaces/IToken.cs @@ -9,4 +9,5 @@ public interface IToken int Length { get; } string Value { get; } int GetIndexToNextToken(); + bool Is(MarkdownTokenName type); } \ No newline at end of file diff --git a/cs/Markdown/MarkdownLexer.cs b/cs/Markdown/MarkdownLexer.cs index c81ac69cd..6886885a0 100644 --- a/cs/Markdown/MarkdownLexer.cs +++ b/cs/Markdown/MarkdownLexer.cs @@ -7,31 +7,39 @@ public class MarkdownLexer : ILexer { private int position; private readonly List tokens = []; + private const string DoubleGround = "__"; + private const string Ground = "_"; + private const string Escape = "\\"; + private const char GroundChar = '_'; + private const char SharpChar = '#'; + private const char EscapeChar = '\\'; + private const char NewLineChar = '\n'; + private const char SpaceChar = ' '; + private readonly char[] escapedChars = [SharpChar, GroundChar, EscapeChar, NewLineChar]; public List Tokenize(string input) { position = 0; - var end = input.Length; - var stack = new Stack<(string tag, int position)>(); + var nestingStack = new Stack(); - while (InBoundary(position)) + while (position < input.Length) { switch (input[position]) { - case ' ': - tokens.Add(new SpaceToken(position++)); + case SpaceChar: + ParseSpaceAndAdvance(); break; - case '\n': - ParseNewLineAndAdvance(stack); + case NewLineChar: + ParseNewLineAndAdvance(nestingStack); break; - case '\\': + case EscapeChar: ParseEscapeAndAdvance(input); break; - case '_': - ParseItalicOrBoldAndAdvance(input, stack); + case GroundChar: + ParseItalicOrBoldAndAdvance(input, nestingStack); break; - case '#': - ParseHeadingAndAdvance(input, stack); + case SharpChar: + ParseHeadingAndAdvance(input); break; default: ParseTextAndAdvance(input); @@ -40,82 +48,135 @@ public List Tokenize(string input) } return tokens; - - bool InBoundary(int i) => i < end; } - private bool IsStartOfParagraph(string input) => position == 0 || position > 0 && input[position - 1] == '\n'; + private void ParseSpaceAndAdvance() => tokens.Add(new SpaceToken(position++)); - private void ParseHeadingAndAdvance(string input, Stack<(string tag, int position)> stack) + private void ParseHeadingAndAdvance(string input) { - if (position + 1 < input.Length && input[position + 1] == ' ' && IsStartOfParagraph(input)) - { - tokens.Add(new HeadingToken(position)); - position += 2; - stack.Push(("# ", position)); - } - else - { - tokens.Add(new TextToken(position, "#")); - position++; - } + if (NextIsSpace(input) && IsStartOfParagraph(input)) tokens.Add(new HeadingToken(position++)); + else tokens.Add(new TextToken(position, "#")); + position++; } private void ParseTextAndAdvance(string input) { - var sb = new StringBuilder(); + var value = new StringBuilder(); var start = position; - var endChars = new[] { '#', '_', '\n', '\\', ' ' }; - while (position < input.Length && !endChars.Contains(input[position]) && !char.IsDigit(input[position])) - sb.Append(input[position++]); + var endChars = new[] { SharpChar, GroundChar, NewLineChar, EscapeChar, SpaceChar }; + while (position < input.Length && !endChars.Contains(input[position]) && !CurrentIsDigit(input)) + value.Append(input[position++]); - if (sb.Length > 0) tokens.Add(new TextToken(start, sb.ToString())); - if (position < input.Length && char.IsDigit(input[position])) ParseNumberAndAdvance(input); + if (value.Length > 0) tokens.Add(new TextToken(start, value.ToString())); + if (position < input.Length && CurrentIsDigit(input)) ParseNumberAndAdvance(input); } + private void ParseNumberAndAdvance(string input) { var sb = new StringBuilder(); var start = position; - while (position < input.Length && (char.IsDigit(input[position]) || input[position] == '_')) + while (position < input.Length && (CurrentIsDigit(input) || input[position] == GroundChar)) sb.Append(input[position++]); tokens.Add(new NumberToken(start, sb.ToString())); } - private void ParseItalicOrBoldAndAdvance(string input, Stack<(string tag, int position)> stack) + private void ParseItalicOrBoldAndAdvance(string input, Stack stack) + { + var isDoubleGround = NextIsGround(input); + var isTripleGround = NextIsDoubleGround(input); + var isSingleGround = !isTripleGround && !isDoubleGround; + if (stack.Count == 0) ParseItalicOrBoldAndAdvanceWhenStackEmpty(isSingleGround, isTripleGround, stack); + else if (stack.Count == 1) + ParseItalicOrBoldAndAdvanceWhenStackHasOne(isSingleGround, isDoubleGround, isTripleGround, stack); + else if (stack.Count == 2) ParseItalicOrBoldAndAdvanceWhenStackHasTwo(isSingleGround, isTripleGround, stack); + } + + private void ParseItalicOrBoldAndAdvanceWhenStackEmpty(bool isSingleGround, bool isTripleGround, + Stack stack) { - var canBeBold = position + 1 < input.Length && input[position + 1] == '_'; - if (stack.Count > 0 && stack.Peek().tag == "__" && canBeBold) ParseBoldAndAdvance(stack); - else if (stack.Count > 0 && stack.Peek().tag == "_") ParseItalicAndAdvance(stack); - else if (canBeBold) ParseBoldAndAdvance(stack); - else ParseItalicAndAdvance(stack); + if (isSingleGround) + { + ParseItalicAndAdvance(); + stack.Push(Ground); + return; + } + + ParseBoldAndAdvance(); + stack.Push(DoubleGround); + if (!isTripleGround) return; + ParseItalicAndAdvance(); + stack.Push(Ground); + } + + private void ParseItalicOrBoldAndAdvanceWhenStackHasOne(bool isSingleGround, bool isDoubleGround, + bool isTripleGround, + Stack stack) + { + switch (stack.Peek()) + { + case DoubleGround when isSingleGround: + ParseItalicAndAdvance(); + stack.Push(Ground); + break; + case DoubleGround: + { + if (isTripleGround) ParseItalicAndAdvance(); + ParseBoldAndAdvance(); + stack.Pop(); + break; + } + case Ground: + { + if (isTripleGround) + { + ParseBoldAndAdvance(); + ParseItalicAndAdvance(); + } + else if (isDoubleGround) + { + tokens.Add(new TextToken(position, DoubleGround)); + position += 2; + } + else ParseItalicAndAdvance(); + + stack.Pop(); + break; + } + } } - private void ParseBoldAndAdvance(Stack<(string tag, int position)> stack) + private void ParseItalicOrBoldAndAdvanceWhenStackHasTwo(bool isSingleGround, bool isTripleGround, + Stack stack) { - if (stack.Count == 0 || stack.Count > 0 && (stack.Peek().tag == "# " || stack.Peek().tag == "_")) - stack.Push(("__", position)); - else if (stack.Count > 0 && stack.Peek().tag == "__") + if (isSingleGround) + { + ParseItalicAndAdvance(); stack.Pop(); - else throw new Exception("Не рассмотрел какой-то случай в жирном"); - + return; + } + + if (isTripleGround) ParseItalicAndAdvance(); + ParseBoldAndAdvance(); + + stack.Pop(); + stack.Pop(); + } + + private void ParseBoldAndAdvance() + { tokens.Add(new BoldToken(position)); position += 2; } - private void ParseItalicAndAdvance(Stack<(string tag, int position)> stack) + private void ParseItalicAndAdvance() { - if (stack.Count == 0 || stack.Count > 0 && (stack.Peek().tag == "__" || stack.Peek().tag == "# ")) - stack.Push(("_", position)); - else if (stack.Count > 0 && stack.Peek().tag == "_") - stack.Pop(); - else throw new Exception("Не рассмотрел какой-то случай в курсиве"); tokens.Add(new ItalicToken(position)); position++; } - private void ParseNewLineAndAdvance(Stack<(string tag, int position)> stack) - { + private void ParseNewLineAndAdvance(Stack stack) + { tokens.Add(new NewLineToken(position)); stack.Clear(); position++; @@ -125,34 +186,31 @@ private void ParseEscapeAndAdvance(string input) { if (position + 1 >= input.Length) { - tokens.Add(new TextToken(position, "\\")); + tokens.Add(new TextToken(position++, Escape)); return; } - if (input[position + 1] == '#') - { - tokens.Add(new TextToken(position, "#")); - position += 2; - } - else if (position + 2 < input.Length && input[position + 1] == '_' && input[position + 2] == '_') + if (NextIsDoubleGround(input)) { - tokens.Add(new TextToken(position, "__")); + tokens.Add(new TextToken(position, DoubleGround)); position += 3; + return; } - else if (input[position + 1] == '_') - { - tokens.Add(new TextToken(position, "_")); - position += 2; - } - else if (input[position + 1] == '\\') - { - tokens.Add(new TextToken(position, "\\")); - position += 2; - } - else - { - tokens.Add(new TextToken(position, input[position].ToString() + input[position + 1])); - position += 2; - } + + var next = input[position + 1]; + tokens.Add(escapedChars.Contains(next) + ? new TextToken(position, next.ToString()) + : new TextToken(position, Escape + next)); + position += 2; } + + private bool NextIsDoubleGround(string input) => + position + 2 < input.Length && input[position + 1] == GroundChar && input[position + 2] == GroundChar; + + private bool NextIsSpace(string input) => position + 1 < input.Length && input[position + 1] == SpaceChar; + private bool NextIsGround(string input) => position + 1 < input.Length && input[position + 1] == GroundChar; + private bool CurrentIsDigit(string input) => char.IsDigit(input[position]); + + private bool IsStartOfParagraph(string input) => + position == 0 || position > 0 && input[position - 1] == NewLineChar; } \ No newline at end of file diff --git a/cs/Markdown/MarkdownParser.cs b/cs/Markdown/MarkdownParser.cs new file mode 100644 index 000000000..28ad8275b --- /dev/null +++ b/cs/Markdown/MarkdownParser.cs @@ -0,0 +1,209 @@ +using Markdown.AstNodes; +using Markdown.Enums; +using Markdown.Tokens; + +namespace Markdown; + +public class MarkdownParser : IParser +{ + private const string DoubleGround = "__"; + private const string Ground = "_"; + private const MarkdownTokenName Text = MarkdownTokenName.Text; + private const MarkdownTokenName Bold = MarkdownTokenName.Bold; + private const MarkdownTokenName Italic = MarkdownTokenName.Italic; + private const MarkdownTokenName NewLine = MarkdownTokenName.NewLine; + private const MarkdownTokenName Space = MarkdownTokenName.Space; + + public RootMarkdownNode Parse(List tokens) + { + var root = new RootMarkdownNode(); + ParseChildren(tokens, root, 0, tokens.Count); + return root; + } + + private void ParseChildren(List tokens, IMarkdownNodeWithChildren parent, int left, int right) + { + if (left < 0 || right > tokens.Count) return; + if (left >= right) return; + var index = left; + while (index >= left && index < right) + { + var token = tokens[index]; + switch (token) + { + case TextToken: + case SpaceToken: + case NewLineToken: + case NumberToken: + parent.Children.Add(new TextMarkdownNode(token.Value)); + index++; + break; + case HeadingToken: + { + var heading = new HeadingMarkdownNode(); + var next = FindIndexOfCloseHeadingToken(tokens, index); + ParseChildren(tokens, heading, index + 1, next == -1 ? right : next); + parent.Children.Add(heading); + index = next == -1 ? right : next; + break; + } + case ItalicToken: + { + index = ParseItalicWithChildren(tokens, parent, index, right); + break; + } + case BoldToken: + { + index = ParseBoldWithChildren(tokens, parent, index); + break; + } + } + } + } + + private int ParseItalicWithChildren(List tokens, IMarkdownNodeWithChildren parent, int start, int right) + { + var italic = new ItalicMarkdownNode(); + var next = FindIndexOfCloseItalicToken(tokens, start); + + if (next == -1 || next >= right) + { + parent.Children.Add(new TextMarkdownNode(Ground)); + return start + 1; + } + + if (parent is ItalicMarkdownNode) + { + parent.Children.Add(new TextMarkdownNode(Ground)); + ParseChildren(tokens, parent, start + 1, next); + parent.Children.Add(new TextMarkdownNode(Ground)); + return next + 1; + } + + if (TokenInWord(tokens, start) && TokenInWord(tokens, next) && + ContainsToken(tokens, MarkdownTokenName.Space, start, next)) + { + parent.Children.Add(new TextMarkdownNode(Ground)); + for (var j = start + 1; j < next; j++) parent.Children.Add(new TextMarkdownNode(tokens[j].Value)); + parent.Children.Add(new TextMarkdownNode(Ground)); + return next + 1; + } + + ParseChildren(tokens, italic, start + 1, next); + if (italic.Children.Count == 0) + { + parent.Children.Add(new TextMarkdownNode(Ground + Ground)); + return start + 2; + } + + parent.Children.Add(italic); + return next + 1; + } + + private int ParseBoldWithChildren(List tokens, IMarkdownNodeWithChildren parent, int i) + { + var bold = new BoldMarkdownNode(); + var next = FindIndexOfCloseBoldToken(tokens, i); + if (next == -1 || parent is ItalicMarkdownNode) + { + parent.Children.Add(new TextMarkdownNode(DoubleGround)); + return i + 1; + } + + var indexOfIntersection = FindIndexOfIntersection(tokens, i + 1, next); + if (indexOfIntersection.start > 0) + { + parent.Children.Add(new TextMarkdownNode(DoubleGround)); + ParseChildren(tokens, parent, i + 1, indexOfIntersection.start); + parent.Children.Add(new TextMarkdownNode(Ground)); + ParseChildren(tokens, parent, indexOfIntersection.start + 1, next); + parent.Children.Add(new TextMarkdownNode(DoubleGround)); + ParseChildren(tokens, parent, next + 1, indexOfIntersection.end); + parent.Children.Add(new TextMarkdownNode(Ground)); + return indexOfIntersection.end + 1; + } + + ParseChildren(tokens, bold, i + 1, next); + if (bold.Children.Count == 0) + { + parent.Children.Add(new TextMarkdownNode(DoubleGround + DoubleGround)); + return i + 2; + } + + parent.Children.Add(bold); + return next + 1; + } + + private int FindIndexOfCloseItalicToken(List tokens, int start) + { + var index = start + 1; + if (index < tokens.Count && tokens[index].Is(Space)) return -1; + while (index < tokens.Count && tokens[index].Name != NewLine) + { + if (!tokens[index].Is(Italic)) + { + index++; + continue; + } + + if (index + 1 < tokens.Count && tokens[index + 1].Is(Italic)) + { + index += 2; + continue; + } + + if (index > 0 && !tokens[index - 1].Is(Space)) return index; + index++; + } + + return -1; + } + + private int FindIndexOfCloseBoldToken(List tokens, int start) + { + var index = start + 1; + if (index >= tokens.Count || tokens[index].Is(Space)) return -1; + while (index < tokens.Count && tokens[index].Name != NewLine) + { + if (index > 0 && tokens[index].Is(Bold) && !tokens[index - 1].Is(Space)) + return index; + index++; + } + + return -1; + } + + private int FindIndexOfCloseHeadingToken(List tokens, int start) + { + var index = start; + while (index < tokens.Count && !tokens[index].Is(NewLine)) + index++; + return index == tokens.Count ? -1 : index; + } + + private (int start, int end) FindIndexOfIntersection(List tokens, int left, int right) + { + for (var i = left; i < right; i++) + if (tokens[i] is ItalicToken) + { + var end = FindIndexOfCloseItalicToken(tokens, i); + if (end > right) return (i, end); + if (end == -1) continue; + i = end + 1; + } + + return (-1, -1); + } + + private bool TokenInWord(List tokens, int index) + => index > 0 && tokens[index - 1].Is(Text) && index + 1 < tokens.Count && + tokens[index + 1].Is(Text); + + private bool ContainsToken(List tokens, MarkdownTokenName expected, int left, int right) + { + for (var i = left; i < right; i++) + if (tokens[i].Is(expected)) + return true; + return false; + } +} \ No newline at end of file diff --git a/cs/Markdown/MarkdownToHtmlConverter.cs b/cs/Markdown/MarkdownToHtmlConverter.cs index c4adb2186..33a47147a 100644 --- a/cs/Markdown/MarkdownToHtmlConverter.cs +++ b/cs/Markdown/MarkdownToHtmlConverter.cs @@ -1,3 +1,4 @@ +using System.Text; using Markdown.AstNodes; namespace Markdown; @@ -10,13 +11,48 @@ public class MarkdownToHtmlConverter(ILexer lexer, IParser parser) public string Convert(string input) { var tokens = Lexer.Tokenize(input); - var enumerable = tokens.ToList(); - var ast = Parser.Parse(enumerable); + var ast = Parser.Parse(tokens); return ConvertAstToHtml(ast); } - private string ConvertAstToHtml(MarkdownNode ast) + private string ConvertAstToHtml(RootMarkdownNode ast) { - throw new NotImplementedException(); + var html = new StringBuilder(); + ConvertToHtml(ast, html); + return html.ToString(); + } + + private void ConvertToHtml(MarkdownNode node, StringBuilder html) + { + switch (node) + { + case TextMarkdownNode textNode: + html.Append(textNode.Content); + break; + case ItalicMarkdownNode italicNode: + html.Append(""); + foreach (var child in italicNode.Children) + ConvertToHtml(child, html); + html.Append(""); + break; + case BoldMarkdownNode boldNode: + html.Append(""); + foreach (var child in boldNode.Children) + ConvertToHtml(child, html); + html.Append(""); + break; + case HeadingMarkdownNode headingNode: + html.Append("

"); + foreach (var child in headingNode.Children) + ConvertToHtml(child, html); + html.Append("

"); + break; + case RootMarkdownNode root: + { + foreach (var child in root.Children) + ConvertToHtml(child, html); + break; + } + } } } \ No newline at end of file diff --git a/cs/Markdown/Tokens/Token.cs b/cs/Markdown/Tokens/Token.cs index eecbc3ec7..6057e9359 100644 --- a/cs/Markdown/Tokens/Token.cs +++ b/cs/Markdown/Tokens/Token.cs @@ -9,6 +9,7 @@ public abstract class Token(int position) : IToken public int Position => position; public int Length => Value.Length; public int GetIndexToNextToken() => Position + Length; + public bool Is(MarkdownTokenName type) => type == Name; public override bool Equals(object? obj) => obj is Token token && Equals(token); diff --git a/cs/MarkdownTests/MarkdownConverterTests.cs b/cs/MarkdownTests/MarkdownConverterTests.cs new file mode 100644 index 000000000..71e95ae8c --- /dev/null +++ b/cs/MarkdownTests/MarkdownConverterTests.cs @@ -0,0 +1,103 @@ +using System.Diagnostics; +using System.Text; +using FluentAssertions; +using Markdown; +using NUnit.Framework; + +namespace MarkdownTests; + +[TestFixture] +public class MarkdownConverterTests +{ + MarkdownToHtmlConverter converter; + + [SetUp] + public void Setup() + { + var lexer = new MarkdownLexer(); + var parser = new MarkdownParser(); + converter = new MarkdownToHtmlConverter(lexer, parser); + } + + [Test] + public void ConvertMarkdownToHtml() + { + var md = "# title"; + var expected = "

title

"; + var actual = converter.Convert(md); + actual.Should().Be(expected); + } + + [TestCase("# header", "

header

")] + [TestCase("_italic_", "italic")] + [TestCase("ita_lic_", "italic")] + [TestCase("__strong__", "strong")] + [TestCase("st__rong__", "strong")] + [TestCase("___text___", "text")] + [TestCase("__text _text_ text__", "text text text")] + [TestCase("# header\n new line", "

header

\n new line")] + [TestCase(@"\n\_Вот это\_", @"\n_Вот это_")] + [TestCase("line with _italic_ text", "line with italic text")] + [TestCase("a _t_ b", "a t b")] + [TestCase("line with __strong__ text", "line with strong text")] + [TestCase("line with __text _text_ text__ abc", "line with text text text abc")] + [TestCase("# Header 1\n ___Dear Diary___, today has been a _hard_ day", + "

Header 1

\n Dear Diary, today has been a hard day")] + [TestCase("# _Header_ 1\n ___Dear Diary___, today has been a _hard_ day", + "

Header 1

\n Dear Diary, today has been a hard day")] + public void MdRender_ReturnsExpectedHtml(string md, string expected) + { + var actual = converter.Convert(md); + actual.Should().Be(expected); + } + + [Test] + public void ConvertMarkdownToHtml_ConformsToSpecification() + { + var dir = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestsData"); + var expectedPath = Path.Combine(dir, "expected.txt"); + var md = File.ReadAllText(Path.Combine(dir, "test.txt")); + + var expected = File.ReadAllText(expectedPath); + var actual = converter.Convert(md); + + actual.Should().Be(expected); + } + + [Test] + public void Convert_ShouldPerformInLinearTime() + { + const int smallInputSize = 1000; + const int largeInputSize = 100000; + + var smallInput = GenerateMarkdownInput(smallInputSize); + var largeInput = GenerateMarkdownInput(largeInputSize); + + var smallTime = MeasureExecutionTime(() => converter.Convert(smallInput)); + var largeTime = MeasureExecutionTime(() => converter.Convert(largeInput)); + + var growthFactor = (double)largeTime / smallTime; + growthFactor.Should().BeLessThan(largeInputSize / smallInputSize * 1.5, "execution time should grow linearly with the size of the input"); + } + + private string GenerateMarkdownInput(int size) + { + var mdBuilder = new StringBuilder(size); + for (var i = 0; i < size; i++) + { + mdBuilder.Append("# Heading\n"); + mdBuilder.Append("**Bold text**\n"); + mdBuilder.Append("*Italic text*\n"); + } + + return mdBuilder.ToString(); + } + + private long MeasureExecutionTime(Action action) + { + var stopwatch = Stopwatch.StartNew(); + action(); + stopwatch.Stop(); + return stopwatch.ElapsedMilliseconds; + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/MarkdownTests.csproj b/cs/MarkdownTests/MarkdownTests.csproj index e26fc91ad..240a9d459 100644 --- a/cs/MarkdownTests/MarkdownTests.csproj +++ b/cs/MarkdownTests/MarkdownTests.csproj @@ -7,6 +7,14 @@ Exe + + bin\Debug/ + + + + bin\Release/ + + @@ -17,4 +25,9 @@ + + + PreserveNewest + + diff --git a/cs/MarkdownTests/ParserTests.cs b/cs/MarkdownTests/ParserTests.cs new file mode 100644 index 000000000..a69b41f56 --- /dev/null +++ b/cs/MarkdownTests/ParserTests.cs @@ -0,0 +1,164 @@ +using FluentAssertions; +using Markdown; +using Markdown.AstNodes; +using Markdown.Enums; +using Markdown.Tokens; +using NUnit.Framework; + +namespace MarkdownTests; + +public class ParserTests +{ + private MarkdownParser parser; + private MarkdownLexer lexer; + + [SetUp] + public void Setup() + { + parser = new MarkdownParser(); + lexer = new MarkdownLexer(); + } + + [Test] + [Description("Ручная проверка корректности AST для тегов Heading, Italic, Bold, Text")] + public void Parse_ReturnsAst_WithAllTags() + { + const string md = "# a b\\_c _d_ __e__\n___f___ 1_234"; + var tokens = lexer.Tokenize(md); + var actual = parser.Parse(tokens); + + var space = new TextMarkdownNode(" "); + var newLine = new TextMarkdownNode("\n"); + + var heading = new HeadingMarkdownNode(); + heading.Children.Add(new TextMarkdownNode("a")); + heading.Children.Add(space); + heading.Children.Add(new TextMarkdownNode("b")); + heading.Children.Add(new TextMarkdownNode("_")); + heading.Children.Add(new TextMarkdownNode("c")); + heading.Children.Add(space); + + var italicD = new ItalicMarkdownNode(); + italicD.Children.Add(new TextMarkdownNode("d")); + + heading.Children.Add(italicD); + heading.Children.Add(space); + + var boldE = new BoldMarkdownNode(); + boldE.Children.Add(new TextMarkdownNode("e")); + + heading.Children.Add(boldE); + + var boldF = new BoldMarkdownNode(); + var italicF = new ItalicMarkdownNode(); + italicF.Children.Add(new TextMarkdownNode("f")); + boldF.Children.Add(italicF); + + var text1_234 = new TextMarkdownNode("1_234"); + + var expected = new RootMarkdownNode(); + expected.Children.Add(heading); + expected.Children.Add(newLine); + expected.Children.Add(boldF); + expected.Children.Add(space); + expected.Children.Add(text1_234); + + actual.Should().BeEquivalentTo(expected); + } + + [Test] + [Description("Глубина дерева должна быть не больше 5: Root->Heading->Bold->Italic->Text")] + public void Parse_ReturnsAst_WithDepthLessThanFive() + { + var dir = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestsData"); + var md = File.ReadAllText(Path.Combine(dir, "test.txt")); + var tokens = lexer.Tokenize(md); + var ast = parser.Parse(tokens); + var depth = GetAstDepth(ast); + depth.Should().BeLessThanOrEqualTo(5); + } + + [Test] + [Description("Вложенность должна быть корректной: Root->Heading->Bold->Italic->Text")] + public void Parse_ReturnsAst_WithCorrectNesting() + { + var dir = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestsData"); + var md = File.ReadAllText(Path.Combine(dir, "test.txt")); + var tokens = lexer.Tokenize(md); + var ast = parser.Parse(tokens); + CheckNesting(ast); + } + + [Test] + public void Parse_Ast_NotHaveEmptyItalic() + { + var dir = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestsData"); + var md = File.ReadAllText(Path.Combine(dir, "test.txt")); + var tokens = lexer.Tokenize(md); + var ast = parser.Parse(tokens); + AstNotHaveEmpty(ast); + } + + [Test] + public void Parse_Ast_NotHaveEmptyBold() + { + var dir = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestsData"); + var md = File.ReadAllText(Path.Combine(dir, "test.txt")); + var tokens = lexer.Tokenize(md); + var ast = parser.Parse(tokens); + AstNotHaveEmpty(ast); + } + + private void AstNotHaveEmpty(MarkdownNode node) where TDisallowed : MarkdownNode + { + if (node is TextMarkdownNode) return; + if (node is IMarkdownNodeWithChildren nodeWithChildren) + { + if (nodeWithChildren is TDisallowed) + nodeWithChildren.Children.Should().NotBeEmpty(); + foreach (var child in nodeWithChildren.Children) + AstNotHaveEmpty(child); + } + else throw new ArgumentException($"Not expected node type: {node.Type}"); + } + + private int GetAstDepth(MarkdownNode node, int level = 1) + { + var maxLevel = level; + if (node is not IMarkdownNodeWithChildren nodeWithChildren) return maxLevel; + foreach (var child in nodeWithChildren.Children) + maxLevel = Math.Max(level, GetAstDepth(child, level + 1)); + return maxLevel; + } + + private void CheckNesting(MarkdownNode node) + { + if (node is TextMarkdownNode) return; + if (node is IMarkdownNodeWithChildren nodeWithChildren) + { + var childrenTypes = nodeWithChildren.Children.Select(n => n.Type).ToHashSet(); + var allowedTypes = GetAllowedChildrenFor(node); + + foreach (var type in childrenTypes) + allowedTypes.Should().Contain(type); + + foreach (var child in nodeWithChildren.Children) + CheckNesting(child); + } + else throw new ArgumentException($"Not expected node type: {node.Type}"); + } + + private MarkdownNodeName[] GetAllowedChildrenFor(MarkdownNode node) + { + return node switch + { + RootMarkdownNode => + [MarkdownNodeName.Bold, MarkdownNodeName.Italic, MarkdownNodeName.Text, MarkdownNodeName.Heading], + HeadingMarkdownNode => [MarkdownNodeName.Bold, MarkdownNodeName.Italic, MarkdownNodeName.Text], + BoldMarkdownNode => [MarkdownNodeName.Italic, MarkdownNodeName.Text], + ItalicMarkdownNode => [MarkdownNodeName.Text], + TextMarkdownNode => [], + _ => throw new ArgumentException($"Not expected node type: {node.Type}") + }; + } +} \ No newline at end of file diff --git a/cs/MarkdownTests/TestsData/expected.txt b/cs/MarkdownTests/TestsData/expected.txt new file mode 100644 index 000000000..350d75b40 --- /dev/null +++ b/cs/MarkdownTests/TestsData/expected.txt @@ -0,0 +1,73 @@ +

Спецификация языка разметки

+ +Посмотрите этот файл в сыром виде. Сравните с тем, что показывает github. +Все совпадения случайны ;) + + + +

Курсив

+ +Текст, окруженный с двух сторон одинарными символами подчерка, +должен помещаться в HTML-тег \ вот так: + +Текст, \окруженный с двух сторон\ одинарными символами подчерка, +должен помещаться в HTML-тег \. + + + +

Полужирный

+ +Выделенный двумя символами текст должен становиться полужирным с помощью тега \. + + + +

Экранирование

+ +Любой символ можно экранировать, чтобы он не считался частью разметки. +_Вот это_, не должно выделиться тегом \. + +Символ экранирования исчезает из результата, только если экранирует что-то. +Здесь сим\волы экранирования\ \должны остаться. + +Символ экранирования тоже можно экранировать: \вот это будет выделено тегом \ + + + +

Взаимодействие тегов

+ +Внутри двойного выделения одинарное тоже работает. + +Но не наоборот — внутри одинарного __двойное__ не работает. + +Подчерки внутри текста c цифрами_12_3 не считаются выделением и должны оставаться символами подчерка. + +Однако выделять часть слова они могут: и в начале, и в середине, и в конце. + +В то же время выделение в ра_зных сл_овах не работает. + +__Непарные_ символы в рамках одного абзаца не считаются выделением. + +За подчерками, начинающими выделение, должен следовать непробельный символ. Иначе эти_ подчерки_ не считаются выделением +и остаются просто символами подчерка. + +Подчерки, заканчивающие выделение, должны следовать за непробельным символом. Иначе эти подчерки _не считаются окончанием выделения +и остаются просто символами подчерка. + +В случае __пересечения _двойных__ и одинарных_ подчерков ни один из них не считается выделением. + +Если внутри подчерков пустая строка ____, то они остаются символами подчерка. + + + +

Заголовки

+ +Абзац, начинающийся с "# ", выделяется тегом \

в заголовок. +В тексте заголовка могут присутствовать все прочие символы разметки с указанными правилами. + +Таким образом + +

Заголовок с разными символами

+ +превратится в: + +\

Заголовок \с \разными\ символами\\

\ No newline at end of file diff --git a/cs/MarkdownTests/TestsData/test.txt b/cs/MarkdownTests/TestsData/test.txt new file mode 100644 index 000000000..886e99c95 --- /dev/null +++ b/cs/MarkdownTests/TestsData/test.txt @@ -0,0 +1,73 @@ +# Спецификация языка разметки + +Посмотрите этот файл в сыром виде. Сравните с тем, что показывает github. +Все совпадения случайны ;) + + + +# Курсив + +Текст, _окруженный с двух сторон_ одинарными символами подчерка, +должен помещаться в HTML-тег \ вот так: + +Текст, \окруженный с двух сторон\ одинарными символами подчерка, +должен помещаться в HTML-тег \. + + + +# Полужирный + +__Выделенный двумя символами текст__ должен становиться полужирным с помощью тега \. + + + +# Экранирование + +Любой символ можно экранировать, чтобы он не считался частью разметки. +\_Вот это\_, не должно выделиться тегом \. + +Символ экранирования исчезает из результата, только если экранирует что-то. +Здесь сим\волы экранирования\ \должны остаться.\ + +Символ экранирования тоже можно экранировать: \\_вот это будет выделено тегом_ \ + + + +# Взаимодействие тегов + +Внутри __двойного выделения _одинарное_ тоже__ работает. + +Но не наоборот — внутри _одинарного __двойное__ не_ работает. + +Подчерки внутри текста c цифрами_12_3 не считаются выделением и должны оставаться символами подчерка. + +Однако выделять часть слова они могут: и в _нач_але, и в сер_еди_не, и в кон_це._ + +В то же время выделение в ра_зных сл_овах не работает. + +__Непарные_ символы в рамках одного абзаца не считаются выделением. + +За подчерками, начинающими выделение, должен следовать непробельный символ. Иначе эти_ подчерки_ не считаются выделением +и остаются просто символами подчерка. + +Подчерки, заканчивающие выделение, должны следовать за непробельным символом. Иначе эти _подчерки _не считаются_ окончанием выделения +и остаются просто символами подчерка. + +В случае __пересечения _двойных__ и одинарных_ подчерков ни один из них не считается выделением. + +Если внутри подчерков пустая строка ____, то они остаются символами подчерка. + + + +# Заголовки + +Абзац, начинающийся с "# ", выделяется тегом \

в заголовок. +В тексте заголовка могут присутствовать все прочие символы разметки с указанными правилами. + +Таким образом + +# Заголовок __с _разными_ символами__ + +превратится в: + +\

Заголовок \с \разными\ символами\\

\ No newline at end of file From d932240cebc17a5387381d3f7d517aec959d5e90 Mon Sep 17 00:00:00 2001 From: Maxim Mineev Date: Tue, 10 Dec 2024 18:49:41 +0500 Subject: [PATCH 5/7] =?UTF-8?q?=D0=98=D0=B7=D0=B1=D0=B0=D0=B2=D0=B8=D0=BB?= =?UTF-8?q?=D1=81=D1=8F=20=D0=BE=D1=82=20IToken=20=D0=B8=20HetIndexToNextT?= =?UTF-8?q?oken.=20=D0=A1=D0=B4=D0=B5=D0=BB=D0=B0=D0=BB=20Markdown=20?= =?UTF-8?q?=D0=B1=D0=B8=D0=B1=D0=BB=D0=B8=D0=BE=D1=82=D0=B5=D0=BA=D0=BE?= =?UTF-8?q?=D0=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/Interfaces/ILexer.cs | 4 +++- cs/Markdown/Interfaces/IParser.cs | 3 ++- cs/Markdown/Interfaces/IToken.cs | 13 ------------- cs/Markdown/Markdown.csproj | 1 - cs/Markdown/MarkdownLexer.cs | 6 +++--- cs/Markdown/MarkdownParser.cs | 20 ++++++++++---------- cs/Markdown/MarkdownToHtmlConverter.cs | 4 ++-- cs/Markdown/Program.cs | 9 --------- cs/Markdown/Tokens/Token.cs | 3 +-- cs/MarkdownTests/LexerTests.cs | 12 ++++++------ cs/MarkdownTests/ParserTests.cs | 1 - cs/clean-code.sln.DotSettings | 3 +++ 12 files changed, 30 insertions(+), 49 deletions(-) delete mode 100644 cs/Markdown/Interfaces/IToken.cs delete mode 100644 cs/Markdown/Program.cs diff --git a/cs/Markdown/Interfaces/ILexer.cs b/cs/Markdown/Interfaces/ILexer.cs index f1d4032ea..907fbd784 100644 --- a/cs/Markdown/Interfaces/ILexer.cs +++ b/cs/Markdown/Interfaces/ILexer.cs @@ -1,6 +1,8 @@ +using Markdown.Tokens; + namespace Markdown; public interface ILexer { - List Tokenize(string input); + List Tokenize(string input); } \ No newline at end of file diff --git a/cs/Markdown/Interfaces/IParser.cs b/cs/Markdown/Interfaces/IParser.cs index 5cf7a0621..63d820524 100644 --- a/cs/Markdown/Interfaces/IParser.cs +++ b/cs/Markdown/Interfaces/IParser.cs @@ -1,8 +1,9 @@ using Markdown.AstNodes; +using Markdown.Tokens; namespace Markdown; public interface IParser { - RootMarkdownNode Parse(List tokens); + RootMarkdownNode Parse(List tokens); } \ No newline at end of file diff --git a/cs/Markdown/Interfaces/IToken.cs b/cs/Markdown/Interfaces/IToken.cs deleted file mode 100644 index 9404c8c60..000000000 --- a/cs/Markdown/Interfaces/IToken.cs +++ /dev/null @@ -1,13 +0,0 @@ -using Markdown.Enums; - -namespace Markdown; - -public interface IToken -{ - MarkdownTokenName Name { get; } - int Position { get; } - int Length { get; } - string Value { get; } - int GetIndexToNextToken(); - bool Is(MarkdownTokenName type); -} \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj index 2f4fc7765..3a6353295 100644 --- a/cs/Markdown/Markdown.csproj +++ b/cs/Markdown/Markdown.csproj @@ -1,7 +1,6 @@  - Exe net8.0 enable enable diff --git a/cs/Markdown/MarkdownLexer.cs b/cs/Markdown/MarkdownLexer.cs index 6886885a0..3ffb3b1a9 100644 --- a/cs/Markdown/MarkdownLexer.cs +++ b/cs/Markdown/MarkdownLexer.cs @@ -6,7 +6,7 @@ namespace Markdown; public class MarkdownLexer : ILexer { private int position; - private readonly List tokens = []; + private readonly List tokens = []; private const string DoubleGround = "__"; private const string Ground = "_"; private const string Escape = "\\"; @@ -17,7 +17,7 @@ public class MarkdownLexer : ILexer private const char SpaceChar = ' '; private readonly char[] escapedChars = [SharpChar, GroundChar, EscapeChar, NewLineChar]; - public List Tokenize(string input) + public List Tokenize(string input) { position = 0; var nestingStack = new Stack(); @@ -55,7 +55,7 @@ public List Tokenize(string input) private void ParseHeadingAndAdvance(string input) { if (NextIsSpace(input) && IsStartOfParagraph(input)) tokens.Add(new HeadingToken(position++)); - else tokens.Add(new TextToken(position, "#")); + else tokens.Add(new TextToken(position, $"{SharpChar}")); position++; } diff --git a/cs/Markdown/MarkdownParser.cs b/cs/Markdown/MarkdownParser.cs index 28ad8275b..b9318da7d 100644 --- a/cs/Markdown/MarkdownParser.cs +++ b/cs/Markdown/MarkdownParser.cs @@ -14,14 +14,14 @@ public class MarkdownParser : IParser private const MarkdownTokenName NewLine = MarkdownTokenName.NewLine; private const MarkdownTokenName Space = MarkdownTokenName.Space; - public RootMarkdownNode Parse(List tokens) + public RootMarkdownNode Parse(List tokens) { var root = new RootMarkdownNode(); ParseChildren(tokens, root, 0, tokens.Count); return root; } - private void ParseChildren(List tokens, IMarkdownNodeWithChildren parent, int left, int right) + private void ParseChildren(List tokens, IMarkdownNodeWithChildren parent, int left, int right) { if (left < 0 || right > tokens.Count) return; if (left >= right) return; @@ -61,7 +61,7 @@ private void ParseChildren(List tokens, IMarkdownNodeWithChildren parent } } - private int ParseItalicWithChildren(List tokens, IMarkdownNodeWithChildren parent, int start, int right) + private int ParseItalicWithChildren(List tokens, IMarkdownNodeWithChildren parent, int start, int right) { var italic = new ItalicMarkdownNode(); var next = FindIndexOfCloseItalicToken(tokens, start); @@ -100,7 +100,7 @@ private int ParseItalicWithChildren(List tokens, IMarkdownNodeWithChildr return next + 1; } - private int ParseBoldWithChildren(List tokens, IMarkdownNodeWithChildren parent, int i) + private int ParseBoldWithChildren(List tokens, IMarkdownNodeWithChildren parent, int i) { var bold = new BoldMarkdownNode(); var next = FindIndexOfCloseBoldToken(tokens, i); @@ -134,7 +134,7 @@ private int ParseBoldWithChildren(List tokens, IMarkdownNodeWithChildren return next + 1; } - private int FindIndexOfCloseItalicToken(List tokens, int start) + private int FindIndexOfCloseItalicToken(List tokens, int start) { var index = start + 1; if (index < tokens.Count && tokens[index].Is(Space)) return -1; @@ -159,7 +159,7 @@ private int FindIndexOfCloseItalicToken(List tokens, int start) return -1; } - private int FindIndexOfCloseBoldToken(List tokens, int start) + private int FindIndexOfCloseBoldToken(List tokens, int start) { var index = start + 1; if (index >= tokens.Count || tokens[index].Is(Space)) return -1; @@ -173,7 +173,7 @@ private int FindIndexOfCloseBoldToken(List tokens, int start) return -1; } - private int FindIndexOfCloseHeadingToken(List tokens, int start) + private int FindIndexOfCloseHeadingToken(List tokens, int start) { var index = start; while (index < tokens.Count && !tokens[index].Is(NewLine)) @@ -181,7 +181,7 @@ private int FindIndexOfCloseHeadingToken(List tokens, int start) return index == tokens.Count ? -1 : index; } - private (int start, int end) FindIndexOfIntersection(List tokens, int left, int right) + private (int start, int end) FindIndexOfIntersection(List tokens, int left, int right) { for (var i = left; i < right; i++) if (tokens[i] is ItalicToken) @@ -195,11 +195,11 @@ private int FindIndexOfCloseHeadingToken(List tokens, int start) return (-1, -1); } - private bool TokenInWord(List tokens, int index) + private bool TokenInWord(List tokens, int index) => index > 0 && tokens[index - 1].Is(Text) && index + 1 < tokens.Count && tokens[index + 1].Is(Text); - private bool ContainsToken(List tokens, MarkdownTokenName expected, int left, int right) + private bool ContainsToken(List tokens, MarkdownTokenName expected, int left, int right) { for (var i = left; i < right; i++) if (tokens[i].Is(expected)) diff --git a/cs/Markdown/MarkdownToHtmlConverter.cs b/cs/Markdown/MarkdownToHtmlConverter.cs index 33a47147a..ac909b9ef 100644 --- a/cs/Markdown/MarkdownToHtmlConverter.cs +++ b/cs/Markdown/MarkdownToHtmlConverter.cs @@ -5,8 +5,8 @@ namespace Markdown; public class MarkdownToHtmlConverter(ILexer lexer, IParser parser) { - public ILexer Lexer { get; } = lexer; - public IParser Parser { get; } = parser; + private ILexer Lexer { get; } = lexer; + private IParser Parser { get; } = parser; public string Convert(string input) { diff --git a/cs/Markdown/Program.cs b/cs/Markdown/Program.cs deleted file mode 100644 index 525eda88d..000000000 --- a/cs/Markdown/Program.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace Markdown; - -class Program -{ - static void Main(string[] args) - { - Console.WriteLine("Hello, World!"); - } -} \ No newline at end of file diff --git a/cs/Markdown/Tokens/Token.cs b/cs/Markdown/Tokens/Token.cs index 6057e9359..4833b081d 100644 --- a/cs/Markdown/Tokens/Token.cs +++ b/cs/Markdown/Tokens/Token.cs @@ -2,13 +2,12 @@ namespace Markdown.Tokens; -public abstract class Token(int position) : IToken +public abstract class Token(int position) { public abstract MarkdownTokenName Name { get; } public abstract string Value { get; } public int Position => position; public int Length => Value.Length; - public int GetIndexToNextToken() => Position + Length; public bool Is(MarkdownTokenName type) => type == Name; public override bool Equals(object? obj) => obj is Token token && Equals(token); diff --git a/cs/MarkdownTests/LexerTests.cs b/cs/MarkdownTests/LexerTests.cs index 6199bae31..3bcdf060d 100644 --- a/cs/MarkdownTests/LexerTests.cs +++ b/cs/MarkdownTests/LexerTests.cs @@ -16,7 +16,7 @@ public class LexerTests public void Tokenize_WorksCorrect_WhenItalic() { const string text = "_italic_"; - var expected = new IToken[] { new ItalicToken(0), new TextToken(1, "italic"), new ItalicToken(7) }; + var expected = new Token[] { new ItalicToken(0), new TextToken(1, "italic"), new ItalicToken(7) }; var actual = lexer.Tokenize(text); actual.Should().BeEquivalentTo(expected, o => o.WithStrictOrdering()); } @@ -25,7 +25,7 @@ public void Tokenize_WorksCorrect_WhenItalic() public void Tokenize_WorksCorrect_WhenBold() { const string text = "__bold__"; - var expected = new IToken[] { new BoldToken(0), new TextToken(2, "bold"), new BoldToken(6) }; + var expected = new Token[] { new BoldToken(0), new TextToken(2, "bold"), new BoldToken(6) }; var actual = lexer.Tokenize(text); actual.Should().BeEquivalentTo(expected, o => o.WithStrictOrdering()); } @@ -34,7 +34,7 @@ public void Tokenize_WorksCorrect_WhenBold() public void Tokenize_WorksCorrect_WhenHeadingWithoutCloseTag() { const string text = "# heading"; - var expected = new IToken[] { new HeadingToken(0), new TextToken(2, "heading") }; + var expected = new Token[] { new HeadingToken(0), new TextToken(2, "heading") }; var actual = lexer.Tokenize(text); actual.Should().BeEquivalentTo(expected, o => o.WithStrictOrdering()); } @@ -43,7 +43,7 @@ public void Tokenize_WorksCorrect_WhenHeadingWithoutCloseTag() public void Tokenize_WorksCorrect_WhenHeadingWithCloseTag() { const string text = "# heading\ntext"; - var expected = new IToken[] + var expected = new Token[] { new HeadingToken(0), new TextToken(2, "heading"), new NewLineToken(9), new TextToken(10, "text") }; @@ -55,7 +55,7 @@ public void Tokenize_WorksCorrect_WhenHeadingWithCloseTag() public void Tokenize_WorksCorrect_WithItalicInBold() { const string text = "__bold _italic___"; - var expected = new IToken[] + var expected = new Token[] { new BoldToken(0), new TextToken(2, "bold"), new SpaceToken(6), new ItalicToken(7), new TextToken(8, "italic"), new ItalicToken(14), new BoldToken(15) @@ -68,7 +68,7 @@ public void Tokenize_WorksCorrect_WithItalicInBold() public void Tokenize_WorksCorrect_WithAllTags() { const string text = "# a b\\_c _d_ __e__\n___f___ 1_234"; - var expected = new IToken[] + var expected = new Token[] { new HeadingToken(0), new TextToken(2, "a"), diff --git a/cs/MarkdownTests/ParserTests.cs b/cs/MarkdownTests/ParserTests.cs index a69b41f56..441754fd4 100644 --- a/cs/MarkdownTests/ParserTests.cs +++ b/cs/MarkdownTests/ParserTests.cs @@ -2,7 +2,6 @@ using Markdown; using Markdown.AstNodes; using Markdown.Enums; -using Markdown.Tokens; using NUnit.Framework; namespace MarkdownTests; diff --git a/cs/clean-code.sln.DotSettings b/cs/clean-code.sln.DotSettings index 135b83ecb..229f449d2 100644 --- a/cs/clean-code.sln.DotSettings +++ b/cs/clean-code.sln.DotSettings @@ -1,6 +1,9 @@  <Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /> <Policy Inspect="True" Prefix="" Suffix="" Style="AaBb_AaBb" /> + <Policy><Descriptor Staticness="Instance" AccessRightKinds="Private" Description="Instance fields (private)"><ElementKinds><Kind Name="FIELD" /><Kind Name="READONLY_FIELD" /></ElementKinds></Descriptor><Policy Inspect="True" Prefix="" Suffix="" Style="aaBb" /></Policy> + <Policy><Descriptor Staticness="Any" AccessRightKinds="Any" Description="Types and namespaces"><ElementKinds><Kind Name="NAMESPACE" /><Kind Name="CLASS" /><Kind Name="STRUCT" /><Kind Name="ENUM" /><Kind Name="DELEGATE" /></ElementKinds></Descriptor><Policy Inspect="True" Prefix="" Suffix="" Style="AaBb_AaBb" /></Policy> + True True True Imported 10.10.2016 From 4490d82228483adb4725a55735ce307f73f09508 Mon Sep 17 00:00:00 2001 From: Maxim Mineev Date: Tue, 10 Dec 2024 18:55:07 +0500 Subject: [PATCH 6/7] =?UTF-8?q?=D0=92=D1=8B=D0=BD=D0=B5=D1=81=20=D0=BA?= =?UTF-8?q?=D0=BE=D0=BD=D1=81=D1=82=D0=B0=D0=BD=D1=82=D1=8B=20=D0=B4=D0=BB?= =?UTF-8?q?=D1=8F=20=D0=BF=D0=B0=D1=80=D1=81=D0=B5=D1=80=D0=B0=20=D0=B8=20?= =?UTF-8?q?=D0=BB=D0=B5=D0=BA=D1=81=D0=B5=D1=80=D0=B0=20=D0=B2=20=D0=BE?= =?UTF-8?q?=D1=82=D0=B4=D0=B5=D0=BB=D1=8C=D0=BD=D1=8B=D0=B9=20=D0=BA=D0=BB?= =?UTF-8?q?=D0=B0=D1=81=D1=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/MarkdownLexer.cs | 69 ++++++++++++++++++---------------- cs/Markdown/MarkdownParser.cs | 26 ++++++------- cs/Markdown/MarkdownSymbols.cs | 14 +++++++ 3 files changed, 63 insertions(+), 46 deletions(-) create mode 100644 cs/Markdown/MarkdownSymbols.cs diff --git a/cs/Markdown/MarkdownLexer.cs b/cs/Markdown/MarkdownLexer.cs index 3ffb3b1a9..244946309 100644 --- a/cs/Markdown/MarkdownLexer.cs +++ b/cs/Markdown/MarkdownLexer.cs @@ -7,15 +7,11 @@ public class MarkdownLexer : ILexer { private int position; private readonly List tokens = []; - private const string DoubleGround = "__"; - private const string Ground = "_"; - private const string Escape = "\\"; - private const char GroundChar = '_'; - private const char SharpChar = '#'; - private const char EscapeChar = '\\'; - private const char NewLineChar = '\n'; - private const char SpaceChar = ' '; - private readonly char[] escapedChars = [SharpChar, GroundChar, EscapeChar, NewLineChar]; + + private readonly char[] escapedChars = + [ + MarkdownSymbols.SharpChar, MarkdownSymbols.GroundChar, MarkdownSymbols.EscapeChar, MarkdownSymbols.NewLineChar + ]; public List Tokenize(string input) { @@ -26,19 +22,19 @@ public List Tokenize(string input) { switch (input[position]) { - case SpaceChar: + case MarkdownSymbols.SpaceChar: ParseSpaceAndAdvance(); break; - case NewLineChar: + case MarkdownSymbols.NewLineChar: ParseNewLineAndAdvance(nestingStack); break; - case EscapeChar: + case MarkdownSymbols.EscapeChar: ParseEscapeAndAdvance(input); break; - case GroundChar: + case MarkdownSymbols.GroundChar: ParseItalicOrBoldAndAdvance(input, nestingStack); break; - case SharpChar: + case MarkdownSymbols.SharpChar: ParseHeadingAndAdvance(input); break; default: @@ -55,7 +51,7 @@ public List Tokenize(string input) private void ParseHeadingAndAdvance(string input) { if (NextIsSpace(input) && IsStartOfParagraph(input)) tokens.Add(new HeadingToken(position++)); - else tokens.Add(new TextToken(position, $"{SharpChar}")); + else tokens.Add(new TextToken(position, MarkdownSymbols.Sharp)); position++; } @@ -63,7 +59,11 @@ private void ParseTextAndAdvance(string input) { var value = new StringBuilder(); var start = position; - var endChars = new[] { SharpChar, GroundChar, NewLineChar, EscapeChar, SpaceChar }; + var endChars = new[] + { + MarkdownSymbols.SharpChar, MarkdownSymbols.GroundChar, MarkdownSymbols.NewLineChar, + MarkdownSymbols.EscapeChar, MarkdownSymbols.SpaceChar + }; while (position < input.Length && !endChars.Contains(input[position]) && !CurrentIsDigit(input)) value.Append(input[position++]); @@ -76,7 +76,7 @@ private void ParseNumberAndAdvance(string input) { var sb = new StringBuilder(); var start = position; - while (position < input.Length && (CurrentIsDigit(input) || input[position] == GroundChar)) + while (position < input.Length && (CurrentIsDigit(input) || input[position] == MarkdownSymbols.GroundChar)) sb.Append(input[position++]); tokens.Add(new NumberToken(start, sb.ToString())); } @@ -98,15 +98,15 @@ private void ParseItalicOrBoldAndAdvanceWhenStackEmpty(bool isSingleGround, bool if (isSingleGround) { ParseItalicAndAdvance(); - stack.Push(Ground); + stack.Push(MarkdownSymbols.Ground); return; } ParseBoldAndAdvance(); - stack.Push(DoubleGround); + stack.Push(MarkdownSymbols.DoubleGround); if (!isTripleGround) return; ParseItalicAndAdvance(); - stack.Push(Ground); + stack.Push(MarkdownSymbols.Ground); } private void ParseItalicOrBoldAndAdvanceWhenStackHasOne(bool isSingleGround, bool isDoubleGround, @@ -115,18 +115,18 @@ private void ParseItalicOrBoldAndAdvanceWhenStackHasOne(bool isSingleGround, boo { switch (stack.Peek()) { - case DoubleGround when isSingleGround: + case MarkdownSymbols.DoubleGround when isSingleGround: ParseItalicAndAdvance(); - stack.Push(Ground); + stack.Push(MarkdownSymbols.Ground); break; - case DoubleGround: + case MarkdownSymbols.DoubleGround: { if (isTripleGround) ParseItalicAndAdvance(); ParseBoldAndAdvance(); stack.Pop(); break; } - case Ground: + case MarkdownSymbols.Ground: { if (isTripleGround) { @@ -135,7 +135,7 @@ private void ParseItalicOrBoldAndAdvanceWhenStackHasOne(bool isSingleGround, boo } else if (isDoubleGround) { - tokens.Add(new TextToken(position, DoubleGround)); + tokens.Add(new TextToken(position, MarkdownSymbols.DoubleGround)); position += 2; } else ParseItalicAndAdvance(); @@ -186,13 +186,13 @@ private void ParseEscapeAndAdvance(string input) { if (position + 1 >= input.Length) { - tokens.Add(new TextToken(position++, Escape)); + tokens.Add(new TextToken(position++, MarkdownSymbols.Escape)); return; } if (NextIsDoubleGround(input)) { - tokens.Add(new TextToken(position, DoubleGround)); + tokens.Add(new TextToken(position, MarkdownSymbols.DoubleGround)); position += 3; return; } @@ -200,17 +200,22 @@ private void ParseEscapeAndAdvance(string input) var next = input[position + 1]; tokens.Add(escapedChars.Contains(next) ? new TextToken(position, next.ToString()) - : new TextToken(position, Escape + next)); + : new TextToken(position, MarkdownSymbols.Escape + next)); position += 2; } private bool NextIsDoubleGround(string input) => - position + 2 < input.Length && input[position + 1] == GroundChar && input[position + 2] == GroundChar; + position + 2 < input.Length && input[position + 1] == MarkdownSymbols.GroundChar && + input[position + 2] == MarkdownSymbols.GroundChar; + + private bool NextIsSpace(string input) => + position + 1 < input.Length && input[position + 1] == MarkdownSymbols.SpaceChar; + + private bool NextIsGround(string input) => + position + 1 < input.Length && input[position + 1] == MarkdownSymbols.GroundChar; - private bool NextIsSpace(string input) => position + 1 < input.Length && input[position + 1] == SpaceChar; - private bool NextIsGround(string input) => position + 1 < input.Length && input[position + 1] == GroundChar; private bool CurrentIsDigit(string input) => char.IsDigit(input[position]); private bool IsStartOfParagraph(string input) => - position == 0 || position > 0 && input[position - 1] == NewLineChar; + position == 0 || position > 0 && input[position - 1] == MarkdownSymbols.NewLineChar; } \ No newline at end of file diff --git a/cs/Markdown/MarkdownParser.cs b/cs/Markdown/MarkdownParser.cs index b9318da7d..e65728db6 100644 --- a/cs/Markdown/MarkdownParser.cs +++ b/cs/Markdown/MarkdownParser.cs @@ -6,8 +6,6 @@ namespace Markdown; public class MarkdownParser : IParser { - private const string DoubleGround = "__"; - private const string Ground = "_"; private const MarkdownTokenName Text = MarkdownTokenName.Text; private const MarkdownTokenName Bold = MarkdownTokenName.Bold; private const MarkdownTokenName Italic = MarkdownTokenName.Italic; @@ -68,31 +66,31 @@ private int ParseItalicWithChildren(List tokens, IMarkdownNodeWithChildre if (next == -1 || next >= right) { - parent.Children.Add(new TextMarkdownNode(Ground)); + parent.Children.Add(new TextMarkdownNode(MarkdownSymbols.Ground)); return start + 1; } if (parent is ItalicMarkdownNode) { - parent.Children.Add(new TextMarkdownNode(Ground)); + parent.Children.Add(new TextMarkdownNode(MarkdownSymbols.Ground)); ParseChildren(tokens, parent, start + 1, next); - parent.Children.Add(new TextMarkdownNode(Ground)); + parent.Children.Add(new TextMarkdownNode(MarkdownSymbols.Ground)); return next + 1; } if (TokenInWord(tokens, start) && TokenInWord(tokens, next) && ContainsToken(tokens, MarkdownTokenName.Space, start, next)) { - parent.Children.Add(new TextMarkdownNode(Ground)); + parent.Children.Add(new TextMarkdownNode(MarkdownSymbols.Ground)); for (var j = start + 1; j < next; j++) parent.Children.Add(new TextMarkdownNode(tokens[j].Value)); - parent.Children.Add(new TextMarkdownNode(Ground)); + parent.Children.Add(new TextMarkdownNode(MarkdownSymbols.Ground)); return next + 1; } ParseChildren(tokens, italic, start + 1, next); if (italic.Children.Count == 0) { - parent.Children.Add(new TextMarkdownNode(Ground + Ground)); + parent.Children.Add(new TextMarkdownNode(MarkdownSymbols.Ground + MarkdownSymbols.Ground)); return start + 2; } @@ -106,27 +104,27 @@ private int ParseBoldWithChildren(List tokens, IMarkdownNodeWithChildren var next = FindIndexOfCloseBoldToken(tokens, i); if (next == -1 || parent is ItalicMarkdownNode) { - parent.Children.Add(new TextMarkdownNode(DoubleGround)); + parent.Children.Add(new TextMarkdownNode(MarkdownSymbols.DoubleGround)); return i + 1; } var indexOfIntersection = FindIndexOfIntersection(tokens, i + 1, next); if (indexOfIntersection.start > 0) { - parent.Children.Add(new TextMarkdownNode(DoubleGround)); + parent.Children.Add(new TextMarkdownNode(MarkdownSymbols.DoubleGround)); ParseChildren(tokens, parent, i + 1, indexOfIntersection.start); - parent.Children.Add(new TextMarkdownNode(Ground)); + parent.Children.Add(new TextMarkdownNode(MarkdownSymbols.Ground)); ParseChildren(tokens, parent, indexOfIntersection.start + 1, next); - parent.Children.Add(new TextMarkdownNode(DoubleGround)); + parent.Children.Add(new TextMarkdownNode(MarkdownSymbols.DoubleGround)); ParseChildren(tokens, parent, next + 1, indexOfIntersection.end); - parent.Children.Add(new TextMarkdownNode(Ground)); + parent.Children.Add(new TextMarkdownNode(MarkdownSymbols.Ground)); return indexOfIntersection.end + 1; } ParseChildren(tokens, bold, i + 1, next); if (bold.Children.Count == 0) { - parent.Children.Add(new TextMarkdownNode(DoubleGround + DoubleGround)); + parent.Children.Add(new TextMarkdownNode(MarkdownSymbols.DoubleGround + MarkdownSymbols.DoubleGround)); return i + 2; } diff --git a/cs/Markdown/MarkdownSymbols.cs b/cs/Markdown/MarkdownSymbols.cs new file mode 100644 index 000000000..0b4132f04 --- /dev/null +++ b/cs/Markdown/MarkdownSymbols.cs @@ -0,0 +1,14 @@ +namespace Markdown; + +public static class MarkdownSymbols +{ + public const string DoubleGround = "__"; + public const string Ground = "_"; + public const string Sharp = "#"; + public const string Escape = "\\"; + public const char GroundChar = '_'; + public const char SharpChar = '#'; + public const char EscapeChar = '\\'; + public const char NewLineChar = '\n'; + public const char SpaceChar = ' '; +} \ No newline at end of file From cdc67f44a7d4984ee347394f3c9ef0a63514e4b4 Mon Sep 17 00:00:00 2001 From: Maxim Mineev Date: Tue, 10 Dec 2024 19:23:32 +0500 Subject: [PATCH 7/7] =?UTF-8?q?=D0=92=D1=8B=D0=BD=D0=B5=D1=81=20input=20?= =?UTF-8?q?=D0=BB=D0=B5=D0=BA=D1=81=D0=B5=D1=80=D0=B0=20=D0=B2=20ValueObje?= =?UTF-8?q?ct?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cs/Markdown/MarkdownLexer.cs | 43 +++++++++++-------------------- cs/Markdown/MarkdownLexerInput.cs | 22 ++++++++++++++++ 2 files changed, 37 insertions(+), 28 deletions(-) create mode 100644 cs/Markdown/MarkdownLexerInput.cs diff --git a/cs/Markdown/MarkdownLexer.cs b/cs/Markdown/MarkdownLexer.cs index 244946309..dfb54f4e9 100644 --- a/cs/Markdown/MarkdownLexer.cs +++ b/cs/Markdown/MarkdownLexer.cs @@ -13,7 +13,9 @@ public class MarkdownLexer : ILexer MarkdownSymbols.SharpChar, MarkdownSymbols.GroundChar, MarkdownSymbols.EscapeChar, MarkdownSymbols.NewLineChar ]; - public List Tokenize(string input) + public List Tokenize(string input) => Tokenize(new MarkdownLexerInput(input)); + + private List Tokenize(MarkdownLexerInput input) { position = 0; var nestingStack = new Stack(); @@ -48,14 +50,14 @@ public List Tokenize(string input) private void ParseSpaceAndAdvance() => tokens.Add(new SpaceToken(position++)); - private void ParseHeadingAndAdvance(string input) + private void ParseHeadingAndAdvance(MarkdownLexerInput input) { - if (NextIsSpace(input) && IsStartOfParagraph(input)) tokens.Add(new HeadingToken(position++)); + if (input.NextIsSpace(position) && input.IsStartOfParagraph(position)) tokens.Add(new HeadingToken(position++)); else tokens.Add(new TextToken(position, MarkdownSymbols.Sharp)); position++; } - private void ParseTextAndAdvance(string input) + private void ParseTextAndAdvance(MarkdownLexerInput input) { var value = new StringBuilder(); var start = position; @@ -64,27 +66,27 @@ private void ParseTextAndAdvance(string input) MarkdownSymbols.SharpChar, MarkdownSymbols.GroundChar, MarkdownSymbols.NewLineChar, MarkdownSymbols.EscapeChar, MarkdownSymbols.SpaceChar }; - while (position < input.Length && !endChars.Contains(input[position]) && !CurrentIsDigit(input)) + while (position < input.Length && !endChars.Contains(input[position]) && !input.CurrentIsDigit(position)) value.Append(input[position++]); if (value.Length > 0) tokens.Add(new TextToken(start, value.ToString())); - if (position < input.Length && CurrentIsDigit(input)) ParseNumberAndAdvance(input); + if (position < input.Length && input.CurrentIsDigit(position)) ParseNumberAndAdvance(input); } - private void ParseNumberAndAdvance(string input) + private void ParseNumberAndAdvance(MarkdownLexerInput input) { var sb = new StringBuilder(); var start = position; - while (position < input.Length && (CurrentIsDigit(input) || input[position] == MarkdownSymbols.GroundChar)) + while (position < input.Length && (input.CurrentIsDigit(position) || input[position] == MarkdownSymbols.GroundChar)) sb.Append(input[position++]); tokens.Add(new NumberToken(start, sb.ToString())); } - private void ParseItalicOrBoldAndAdvance(string input, Stack stack) + private void ParseItalicOrBoldAndAdvance(MarkdownLexerInput input, Stack stack) { - var isDoubleGround = NextIsGround(input); - var isTripleGround = NextIsDoubleGround(input); + var isDoubleGround = input.NextIsGround(position); + var isTripleGround = input.NextIsDoubleGround(position); var isSingleGround = !isTripleGround && !isDoubleGround; if (stack.Count == 0) ParseItalicOrBoldAndAdvanceWhenStackEmpty(isSingleGround, isTripleGround, stack); else if (stack.Count == 1) @@ -182,7 +184,7 @@ private void ParseNewLineAndAdvance(Stack stack) position++; } - private void ParseEscapeAndAdvance(string input) + private void ParseEscapeAndAdvance(MarkdownLexerInput input) { if (position + 1 >= input.Length) { @@ -190,7 +192,7 @@ private void ParseEscapeAndAdvance(string input) return; } - if (NextIsDoubleGround(input)) + if (input.NextIsDoubleGround(position)) { tokens.Add(new TextToken(position, MarkdownSymbols.DoubleGround)); position += 3; @@ -203,19 +205,4 @@ private void ParseEscapeAndAdvance(string input) : new TextToken(position, MarkdownSymbols.Escape + next)); position += 2; } - - private bool NextIsDoubleGround(string input) => - position + 2 < input.Length && input[position + 1] == MarkdownSymbols.GroundChar && - input[position + 2] == MarkdownSymbols.GroundChar; - - private bool NextIsSpace(string input) => - position + 1 < input.Length && input[position + 1] == MarkdownSymbols.SpaceChar; - - private bool NextIsGround(string input) => - position + 1 < input.Length && input[position + 1] == MarkdownSymbols.GroundChar; - - private bool CurrentIsDigit(string input) => char.IsDigit(input[position]); - - private bool IsStartOfParagraph(string input) => - position == 0 || position > 0 && input[position - 1] == MarkdownSymbols.NewLineChar; } \ No newline at end of file diff --git a/cs/Markdown/MarkdownLexerInput.cs b/cs/Markdown/MarkdownLexerInput.cs new file mode 100644 index 000000000..6da318635 --- /dev/null +++ b/cs/Markdown/MarkdownLexerInput.cs @@ -0,0 +1,22 @@ +namespace Markdown; + +public class MarkdownLexerInput(string input) +{ + public bool NextIsDoubleGround(int position) => + position + 2 < input.Length && input[position + 1] == MarkdownSymbols.GroundChar && + input[position + 2] == MarkdownSymbols.GroundChar; + + public bool NextIsSpace(int position) => + position + 1 < input.Length && input[position + 1] == MarkdownSymbols.SpaceChar; + + public bool NextIsGround(int position) => + position + 1 < input.Length && input[position + 1] == MarkdownSymbols.GroundChar; + + public bool CurrentIsDigit(int position) => char.IsDigit(input[position]); + + public bool IsStartOfParagraph(int position) => + position == 0 || position > 0 && input[position - 1] == MarkdownSymbols.NewLineChar; + + public char this[int index] => input[index]; + public int Length => input.Length; +} \ No newline at end of file