diff --git a/cs/Markdown/IMarkdownRenderer.cs b/cs/Markdown/IMarkdownRenderer.cs new file mode 100644 index 000000000..ead0a1aae --- /dev/null +++ b/cs/Markdown/IMarkdownRenderer.cs @@ -0,0 +1,6 @@ +namespace Markdown; + +public interface IMarkdownRenderer +{ + string Render(string markdown); +} \ No newline at end of file diff --git a/cs/Markdown/Markdown.csproj b/cs/Markdown/Markdown.csproj new file mode 100644 index 000000000..85278278c --- /dev/null +++ b/cs/Markdown/Markdown.csproj @@ -0,0 +1,15 @@ + + + + net8.0 + enable + enable + + + + + + + + + diff --git a/cs/Markdown/MarkdownRenderer.cs b/cs/Markdown/MarkdownRenderer.cs new file mode 100644 index 000000000..b46a594a1 --- /dev/null +++ b/cs/Markdown/MarkdownRenderer.cs @@ -0,0 +1,32 @@ +using Markdown.Render; +using Markdown.Tokenizer; +using Markdown.Tokenizer.Handlers; +using Markdown.TreeBuilder; + +namespace Markdown; + +public class MarkdownRenderer : IMarkdownRenderer +{ + private readonly ITreeRenderer treeRenderer = new TreeRenderer(); + private readonly ITokenizer tokenizer; + + private readonly List handlers = new() + { + new HeaderHandler(), + new ItalicHandler(), + new BoldHandler(), + }; + + public MarkdownRenderer() + { + tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor()); + } + + public string Render(string markdown) + { + var tokens = tokenizer.Tokenize(markdown); + var tree = new TreeBuilder.TreeBuilder(new NodeFactory()).Build(tokens); + + return treeRenderer.Render(tree); + } +} \ No newline at end of file diff --git a/cs/Markdown/Render/ITreeRenderer.cs b/cs/Markdown/Render/ITreeRenderer.cs new file mode 100644 index 000000000..ffcc74676 --- /dev/null +++ b/cs/Markdown/Render/ITreeRenderer.cs @@ -0,0 +1,8 @@ +using Markdown.TreeBuilder.Nodes; + +namespace Markdown.Render; + +public interface ITreeRenderer +{ + string Render(Node tokens); +} \ No newline at end of file diff --git a/cs/Markdown/Render/TreeRenderer.cs b/cs/Markdown/Render/TreeRenderer.cs new file mode 100644 index 000000000..74eda954f --- /dev/null +++ b/cs/Markdown/Render/TreeRenderer.cs @@ -0,0 +1,25 @@ +using System.Text; +using Markdown.TreeBuilder.Nodes; + +namespace Markdown.Render; + +public class TreeRenderer : ITreeRenderer +{ + public string Render(Node tokens) + { + var sb = new StringBuilder(); + foreach (var token in tokens.Children) + sb.Append(RenderToken(token)); + + return sb.ToString(); + } + + private string? RenderToken(Node node) + { + return node switch + { + TextNode textNode => textNode.Value, + _ => $"{node.OpenTag}{Render(node)}{node.CloseTag}" + }; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/Markdown/MarkdownTests.cs b/cs/Markdown/Tests/Markdown/MarkdownTests.cs new file mode 100644 index 000000000..239def901 --- /dev/null +++ b/cs/Markdown/Tests/Markdown/MarkdownTests.cs @@ -0,0 +1,47 @@ +using NUnit.Framework; + +namespace Markdown.Tests.Markdown; + +[TestFixture] +public class MarkdownTests +{ + private IMarkdownRenderer renderer; + + [SetUp] + public void SetUp() + { + renderer = new MarkdownRenderer(); + } + + [TestCaseSource(nameof(MarkdownRendererTestCases))] + public string MarkdownRenderer_Verify(string input) => renderer.Render(input); + + private static TestCaseData[] MarkdownRendererTestCases = + [ + new TestCaseData("# Header").Returns("

Header

").SetDescription("Простой заголовок."), + new TestCaseData("\\# Header").Returns("# Header").SetDescription("Экранированный заголовок."), + new TestCaseData("\\\\# Header").Returns("\\

Header

").SetDescription("Экранирован экранирования."), + new TestCaseData("_Italic text_").Returns("Italic text").SetDescription("Курсив"), + new TestCaseData("\\_Text_").Returns("_Text_").SetDescription("Экранирование курсива."), + new TestCaseData("\\\\_Italic text_").Returns("\\Italic text") + .SetDescription("Экранирование экранирования курсива."), + new TestCaseData("_Italic text").Returns("_Italic text").SetDescription("Одинокий открывающий тэг."), + new TestCaseData("Italic text_").Returns("Italic text_").SetDescription("Одинокий закрывающий тэг."), + new TestCaseData("Italic_ text_").Returns("Italic_ text_").SetDescription("Два закрывающих тэга."), + new TestCaseData("_Italic _text").Returns("_Italic _text").SetDescription("Два открывающих тэга."), + new TestCaseData("_нач_але").Returns("начале").SetDescription("Курсив в начале слова."), + new TestCaseData("сер_еди_не").Returns("середине").SetDescription("Курсив в середине слова."), + new TestCaseData("кон_це._").Returns("конце.").SetDescription("Курсив в конце слова."), + new TestCaseData("цифры_1_12_3").Returns("цифры_1_12_3").SetDescription("Между цифр - подчерки."), + new TestCaseData("в ра_зных сл_овах не").Returns("в ра_зных сл_овах не") + .SetDescription("В разных словах - не работает."), + new TestCaseData("__bold__").Returns("bold").SetDescription("Полужирный"), + new TestCaseData("_Text__").Returns("_Text__").SetDescription("Разные тэги 1"), + new TestCaseData("__Text_").Returns("__Text_").SetDescription("Разные тэги 2"), + new TestCaseData("__Italic __text").Returns("__Italic __text").SetDescription("Два открывающих тэга."), + new TestCaseData("__два _один_ может__").Returns("два один может") + .SetDescription("Курсив в полужирном."), + new TestCaseData("_одинарного __двойное__ не_").Returns("одинарного __двойное__ не") + .SetDescription("Полужирный в курсиве - не работает."), + ]; +} \ No newline at end of file diff --git a/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs b/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs new file mode 100644 index 000000000..b87cd1505 --- /dev/null +++ b/cs/Markdown/Tests/Tokenizer/BoldHandlerTests.cs @@ -0,0 +1,54 @@ +using FluentAssertions; +using Markdown.Tokenizer; +using Markdown.Tokenizer.Handlers; +using Markdown.Tokenizer.Tags; +using Markdown.TreeBuilder; +using NUnit.Framework; + +namespace Markdown.Tests.Tokenizer; + +[TestFixture] +public class BoldHandlerTests +{ + private ITokenizer tokenizer; + + [SetUp] + public void SetUp() + { + var handlers = new List { new HeaderHandler(), new ItalicHandler(), new BoldHandler() }; + tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor()); + } + + [TestCaseSource(nameof(BoldTokenSource))] + public void BoldTokenizerTests((string input, Token[] tags) testCase) + { + var tokens = tokenizer.Tokenize(testCase.input).ToArray(); + + for (var i = 0; i < testCase.tags.Length; i++) + { + tokens[i].Value.Should().Be(testCase.tags[i].Value); + tokens[i].TokenType.Should().Be(testCase.tags[i].TokenType); + } + } + + public static IEnumerable<(string input, Token[] result)> BoldTokenSource() + { + yield return ("__abc__", [ + new BoldTag(TagStatus.Open), + new TextToken("abc"), + new BoldTag(TagStatus.Closed) + ]); + + yield return ("_abc__", [ + new ItalicTag(TagStatus.Open), + new TextToken("abc"), + new BoldTag(TagStatus.Closed) + ]); + + yield return ("__abc_", [ + new BoldTag(TagStatus.Open), + new TextToken("abc"), + new ItalicTag(TagStatus.Closed) + ]); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs b/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs new file mode 100644 index 000000000..72c144f08 --- /dev/null +++ b/cs/Markdown/Tests/Tokenizer/HeaderHandlerTests.cs @@ -0,0 +1,49 @@ +using FluentAssertions; +using Markdown.Tokenizer; +using Markdown.Tokenizer.Handlers; +using Markdown.Tokenizer.Tags; +using Markdown.TreeBuilder; +using NUnit.Framework; + +namespace Markdown.Tests.Tokenizer; + +[TestFixture] +public class HeaderHandlerTests +{ + private ITokenizer tokenizer; + + [SetUp] + public void SetUp() + { + var handlers = new List { new HeaderHandler(), new ItalicHandler(), new BoldHandler() }; + tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor()); + } + + [TestCaseSource(nameof(HeaderTokenSource))] + public void HeaderTokenizerTests((string input, Token[] tags) testCase) + { + var tokens = tokenizer.Tokenize(testCase.input).ToArray(); + + for (var i = 0; i < testCase.tags.Length; i++) + { + tokens[i].Value.Should().Be(testCase.tags[i].Value); + tokens[i].TokenType.Should().Be(testCase.tags[i].TokenType); + } + } + + private static IEnumerable<(string input, Token[] tags)> HeaderTokenSource() + { + yield return ("abc", [new TextToken("abc")]); + yield return ("# abc", [new HeaderTag(), new TextToken("abc")]); + yield return ("f# abc", [new TextToken("f#"), new TextToken(" abc")]); + yield return ("\\# abc", [new SlashToken(), new HeaderTag(), new TextToken("abc")]); + yield return ("\\\\# abc", [new SlashToken(), new SlashToken(), new HeaderTag(), new TextToken("abc")]); + yield return ("# abc\n# qwe", [ + new HeaderTag(), + new TextToken("abc"), + new NewLineToken(), + new HeaderTag(), + new TextToken("qwe") + ]); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs b/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs new file mode 100644 index 000000000..eae0c758e --- /dev/null +++ b/cs/Markdown/Tests/Tokenizer/ItalicHandlerTests.cs @@ -0,0 +1,80 @@ +using FluentAssertions; +using Markdown.Tokenizer; +using Markdown.Tokenizer.Handlers; +using Markdown.Tokenizer.Tags; +using Markdown.TreeBuilder; +using NUnit.Framework; + + +namespace Markdown.Tests.Tokenizer; + +[TestFixture] +public class ItalicParserTests +{ + private ITokenizer tokenizer; + + [SetUp] + public void SetUp() + { + var handlers = new List { new HeaderHandler(), new ItalicHandler(), new BoldHandler() }; + tokenizer = new MarkdownTokenizer(new HandlerManager(handlers), new TagProcessor()); + } + + [TestCaseSource(nameof(ItalicTokenSource))] + public void ItalicTokenizerTests((string input, Token[] tags) testCase) + { + var tokens = tokenizer.Tokenize(testCase.input).ToArray(); + + for (var i = 0; i < testCase.tags.Length; i++) + { + tokens[i].Value.Should().Be(testCase.tags[i].Value); + tokens[i].TokenType.Should().Be(testCase.tags[i].TokenType); + } + } + + private static IEnumerable<(string input, Token[] tags)> ItalicTokenSource() + { + yield return ("abc", [new TextToken("abc")]); + yield return ("_abc", [new ItalicTag(TagStatus.Open), new TextToken("abc")]); + yield return ("abc_", [new TextToken("abc"), new ItalicTag(TagStatus.Closed)]); + yield return ("a_bc_", [ + new TextToken("a"), + new ItalicTag(TagStatus.InWord), + new TextToken("bc"), + new ItalicTag(TagStatus.Closed) + ]); + yield return ("_a_bc", [ + new ItalicTag(TagStatus.Open), + new TextToken("a"), + new ItalicTag(TagStatus.InWord), + new TextToken("bc") + ]); + + yield return ("_a_bc_", [ + new ItalicTag(TagStatus.Open), + new TextToken("a"), + new ItalicTag(TagStatus.InWord), + new TextToken("bc"), + new ItalicTag(TagStatus.Closed) + ]); + + yield return ("_abc_", [ + new ItalicTag(TagStatus.Open), + new TextToken("abc"), + new ItalicTag(TagStatus.Closed) + ]); + + yield return ("\\_abc", [ + new SlashToken(), + new ItalicTag(TagStatus.Open), + new TextToken("abc") + ]); + + yield return ("\\\\_abc", [ + new SlashToken(), + new SlashToken(), + new ItalicTag(TagStatus.Open), + new TextToken("abc") + ]); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/HandlerManager.cs b/cs/Markdown/Tokenizer/HandlerManager.cs new file mode 100644 index 000000000..acc5f8969 --- /dev/null +++ b/cs/Markdown/Tokenizer/HandlerManager.cs @@ -0,0 +1,33 @@ +using System.Text; +using Markdown.Tokenizer.Handlers; +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer; + +public class HandlerManager(IEnumerable handlers) : IHandlerManager +{ + private readonly List handlers = handlers.ToList(); + + public void TryHandle(TokenizerContext context, StringBuilder buffer, List tags, Stack tagStack) + { + foreach (var handler in handlers) + { + var tag = handler.ProceedSymbol(context); + if (tag != null) + { + if (buffer.Length > 0) + { + var token = new TextToken(buffer.ToString()); + tags.Add(token); + buffer.Clear(); + } + + tags.Add(tag); + tagStack.Push(tag); + return; + } + } + + buffer.Append(context.Current); + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Handlers/BoldHandler.cs b/cs/Markdown/Tokenizer/Handlers/BoldHandler.cs new file mode 100644 index 000000000..e046b40d8 --- /dev/null +++ b/cs/Markdown/Tokenizer/Handlers/BoldHandler.cs @@ -0,0 +1,31 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer.Handlers; + +public class BoldHandler : IHandler +{ + public Token? ProceedSymbol(TokenizerContext ctx) + { + var symbol = ctx.Current; + + if (symbol != '_') + return null; + + if (ctx.Next != '_') + return null; + + if ((ctx.Position == 0 || ctx.Previous == ' ') && ctx.NextNext != ' ') + { + ctx.Advance(); + return new BoldTag(TagStatus.Open); + } + + if (ctx.Previous != ' ') + { + ctx.Advance(); + return new BoldTag(TagStatus.Closed); + } + + return null; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Handlers/HeaderHandler.cs b/cs/Markdown/Tokenizer/Handlers/HeaderHandler.cs new file mode 100644 index 000000000..900f40deb --- /dev/null +++ b/cs/Markdown/Tokenizer/Handlers/HeaderHandler.cs @@ -0,0 +1,22 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer.Handlers; + +public class HeaderHandler : IHandler +{ + public Token? ProceedSymbol(TokenizerContext ctx) + { + var symbol = ctx.Current; + + if (symbol != '#') + return null; + + if ((ctx.Next == ' ' && (ctx.Previous == '\n' || ctx.Position == 0)) || (ctx.Previous == '\\')) + { + ctx.Advance(); + return new HeaderTag(); + } + + return null; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Handlers/IHandler.cs b/cs/Markdown/Tokenizer/Handlers/IHandler.cs new file mode 100644 index 000000000..8ede8da56 --- /dev/null +++ b/cs/Markdown/Tokenizer/Handlers/IHandler.cs @@ -0,0 +1,8 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer.Handlers; + +public interface IHandler +{ + Token? ProceedSymbol(TokenizerContext context); +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Handlers/ItalicHandler.cs b/cs/Markdown/Tokenizer/Handlers/ItalicHandler.cs new file mode 100644 index 000000000..d1abaabdd --- /dev/null +++ b/cs/Markdown/Tokenizer/Handlers/ItalicHandler.cs @@ -0,0 +1,32 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer.Handlers; + +public class ItalicHandler : IHandler +{ + public Token? ProceedSymbol(TokenizerContext ctx) + { + var symbol = ctx.Current; + + if (symbol != '_' || ctx.Next == '_') + return null; + + if (char.IsDigit(ctx.Previous ?? ' ') || char.IsDigit(ctx.Next ?? ' ')) + return null; + + if (ctx.Position == 0 || ctx.Previous == ' ' || ctx.Previous == '\\') + { + return new ItalicTag(TagStatus.Open); + } + + if (ctx.Previous != ' ' && (ctx.Next == ' ' || ctx.Length - 1 == ctx.Position)) + { + return new ItalicTag(TagStatus.Closed); + } + + if (ctx.Previous != ' ' && ctx.Next != ' ') + return new ItalicTag(TagStatus.InWord); + + return null; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/IHandlerManager.cs b/cs/Markdown/Tokenizer/IHandlerManager.cs new file mode 100644 index 000000000..accf44f2c --- /dev/null +++ b/cs/Markdown/Tokenizer/IHandlerManager.cs @@ -0,0 +1,9 @@ +using System.Text; +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer; + +public interface IHandlerManager +{ + void TryHandle(TokenizerContext context, StringBuilder buffer, List tags, Stack tagStack); +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/ITagProcessor.cs b/cs/Markdown/Tokenizer/ITagProcessor.cs new file mode 100644 index 000000000..7dd70d539 --- /dev/null +++ b/cs/Markdown/Tokenizer/ITagProcessor.cs @@ -0,0 +1,8 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer; + +public interface ITagProcessor +{ + void Process(List tags, Stack tagStack); +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/ITokenizer.cs b/cs/Markdown/Tokenizer/ITokenizer.cs new file mode 100644 index 000000000..a6c3a9d42 --- /dev/null +++ b/cs/Markdown/Tokenizer/ITokenizer.cs @@ -0,0 +1,8 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer; + +public interface ITokenizer +{ + List Tokenize(string text); +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/TagProcessor.cs b/cs/Markdown/Tokenizer/TagProcessor.cs new file mode 100644 index 000000000..5f6f9c68e --- /dev/null +++ b/cs/Markdown/Tokenizer/TagProcessor.cs @@ -0,0 +1,114 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.Tokenizer; + +public class TagProcessor : ITagProcessor +{ + public void Process(List tags, Stack tagStack) + { + ProceedEscapedTags(tags); + ProceedInWordsTags(tags); + ProceedPairTags(tagStack); + } + + private void ProceedInWordsTags(List tags) + { + for (var i = 0; i < tags.Count; i++) + { + var current = tags[i]; + if (current.TagStatus == TagStatus.InWord) + { + if (i - 2 >= 0) + { + if (tags[i - 1].TokenType == TokenType.String + && tags[i - 2].TagStatus == TagStatus.Open) + { + current.TagStatus = TagStatus.Closed; + } + } + + if (i + 2 >= tags.Count) continue; + if (tags[i + 1].TokenType != TokenType.String) continue; + if (tags[i + 2].TagStatus == TagStatus.Closed) + { + current.TagStatus = TagStatus.Open; + } + else if (tags[i + 2].TagStatus == TagStatus.InWord) + { + current.TagStatus = TagStatus.Open; + tags[i + 2].TagStatus = TagStatus.Closed; + } + } + } + } + + private void ProceedEscapedTags(List tags) + { + for (var i = 0; i < tags.Count - 1; i++) + { + var current = tags[i]; + var next = tags[i + 1]; + if (current.TokenType is TokenType.Slash && current.TagStatus != TagStatus.Broken) + { + if (next is { TokenType: TokenType.Slash }) + { + current.TagStatus = TagStatus.Escaped; + next.TagStatus = TagStatus.Broken; + } + else if (next is { TagStatus: TagStatus.Open or TagStatus.Closed or TagStatus.Single }) + { + next.TagStatus = TagStatus.Broken; + current.TagStatus = TagStatus.Escaped; + } + } + } + } + + private void ProceedPairTags(Stack tagStack) + { + var tempStack = new Stack(); + + while (tagStack.Count > 0) + { + var current = tagStack.Pop(); + + if (current.TagStatus != TagStatus.Broken && current.TagStatus != TagStatus.Single) + { + if (tempStack.Count > 0) + { + var previousTag = tempStack.Peek(); + + if (previousTag.TokenType == current.TokenType) + { + if (previousTag.TagStatus == TagStatus.Closed && current.TagStatus == TagStatus.Open) + { + tempStack.Pop(); + } + else + { + tempStack.Push(current); + } + } + else + { + if (current.TokenType == TokenType.Bold && previousTag.TokenType == TokenType.Italic) + { + current.TagStatus = TagStatus.Broken; + } + else + { + tempStack.Push(current); + } + } + } + else + { + tempStack.Push(current); + } + } + } + + while (tempStack.Count > 0) + tempStack.Pop().TagStatus = TagStatus.Broken; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/BoldTag.cs b/cs/Markdown/Tokenizer/Tags/BoldTag.cs new file mode 100644 index 000000000..37c8f150b --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/BoldTag.cs @@ -0,0 +1,12 @@ +namespace Markdown.Tokenizer.Tags; + +public class BoldTag : Token +{ + public override TokenType TokenType => TokenType.Bold; + + public BoldTag(TagStatus tagStatus) + { + Value = "__"; + TagStatus = tagStatus; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/HeaderTag.cs b/cs/Markdown/Tokenizer/Tags/HeaderTag.cs new file mode 100644 index 000000000..6be13050e --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/HeaderTag.cs @@ -0,0 +1,12 @@ +namespace Markdown.Tokenizer.Tags; + +public class HeaderTag : Token +{ + public override TokenType TokenType => TokenType.Header; + + public HeaderTag() + { + TagStatus = TagStatus.Single; + Value = "# "; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/ItalicTag.cs b/cs/Markdown/Tokenizer/Tags/ItalicTag.cs new file mode 100644 index 000000000..70e251ca7 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/ItalicTag.cs @@ -0,0 +1,12 @@ +namespace Markdown.Tokenizer.Tags; + +public class ItalicTag : Token +{ + public override TokenType TokenType => TokenType.Italic; + + public ItalicTag(TagStatus tagStatus) + { + Value = "_"; + TagStatus = tagStatus; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/NewLineToken.cs b/cs/Markdown/Tokenizer/Tags/NewLineToken.cs new file mode 100644 index 000000000..273fa7684 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/NewLineToken.cs @@ -0,0 +1,11 @@ +namespace Markdown.Tokenizer.Tags; + +public class NewLineToken : Token +{ + public override TokenType TokenType => TokenType.NewLine; + + public NewLineToken() + { + Value = "\n"; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/SlashToken.cs b/cs/Markdown/Tokenizer/Tags/SlashToken.cs new file mode 100644 index 000000000..a2ddd20e6 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/SlashToken.cs @@ -0,0 +1,11 @@ +namespace Markdown.Tokenizer.Tags; + +public class SlashToken : Token +{ + public override TokenType TokenType => TokenType.Slash; + + public SlashToken() + { + Value = "\\"; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/TagStatus.cs b/cs/Markdown/Tokenizer/Tags/TagStatus.cs new file mode 100644 index 000000000..525959910 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/TagStatus.cs @@ -0,0 +1,11 @@ +namespace Markdown.Tokenizer.Tags; + +public enum TagStatus +{ + Open, + Closed, + Broken, + Escaped, + InWord, + Single +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/TextToken.cs b/cs/Markdown/Tokenizer/Tags/TextToken.cs new file mode 100644 index 000000000..4a5d697d4 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/TextToken.cs @@ -0,0 +1,11 @@ +namespace Markdown.Tokenizer.Tags; + +public class TextToken : Token +{ + public override TokenType TokenType => TokenType.String; + + public TextToken(string value) + { + Value = value; + } +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/Token.cs b/cs/Markdown/Tokenizer/Tags/Token.cs new file mode 100644 index 000000000..21279f838 --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/Token.cs @@ -0,0 +1,8 @@ +namespace Markdown.Tokenizer.Tags; + +public class Token +{ + public TagStatus TagStatus { get; set; } + public virtual TokenType TokenType { get; } + public string Value = string.Empty; +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/Tags/TokenType.cs b/cs/Markdown/Tokenizer/Tags/TokenType.cs new file mode 100644 index 000000000..fea3f46ab --- /dev/null +++ b/cs/Markdown/Tokenizer/Tags/TokenType.cs @@ -0,0 +1,11 @@ +namespace Markdown.Tokenizer.Tags; + +public enum TokenType +{ + String, + Header, + Italic, + Bold, + Slash, + NewLine, +} \ No newline at end of file diff --git a/cs/Markdown/Tokenizer/TokenizerContext.cs b/cs/Markdown/Tokenizer/TokenizerContext.cs new file mode 100644 index 000000000..4b3526e5e --- /dev/null +++ b/cs/Markdown/Tokenizer/TokenizerContext.cs @@ -0,0 +1,14 @@ +namespace Markdown.Tokenizer; + +public class TokenizerContext(string text) +{ + private int position = 0; + public bool IsEnd => position >= text.Length; + public char Current => text[position]; + public int Position => position; + public int Length => text.Length; + public void Advance() => position++; + public char? Previous => position > 0 ? text[position - 1] : null; + public char? Next => position < text.Length - 1 ? text[position + 1] : null; + public char? NextNext => position < text.Length - 2 ? text[position + 2] : null; +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/INodeFactory.cs b/cs/Markdown/TreeBuilder/INodeFactory.cs new file mode 100644 index 000000000..48dc44819 --- /dev/null +++ b/cs/Markdown/TreeBuilder/INodeFactory.cs @@ -0,0 +1,8 @@ +using Markdown.Tokenizer.Tags; + +namespace Markdown.TreeBuilder; + +public interface INodeFactory +{ + NodeAction? CreateNode(Token token); +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/MarkdownTokenizer.cs b/cs/Markdown/TreeBuilder/MarkdownTokenizer.cs new file mode 100644 index 000000000..b3757925d --- /dev/null +++ b/cs/Markdown/TreeBuilder/MarkdownTokenizer.cs @@ -0,0 +1,78 @@ +using System.Text; +using Markdown.Tokenizer; +using Markdown.Tokenizer.Tags; +using Token = Markdown.Tokenizer.Tags.Token; + +namespace Markdown.TreeBuilder; + +public class MarkdownTokenizer(IHandlerManager handlerManager, ITagProcessor tagProcessor) : ITokenizer +{ + private readonly StringBuilder buffer = new(); + private readonly List tags = new(); + private readonly Stack tagStack = new(); + + public List Tokenize(string text) + { + var context = new TokenizerContext(text); + while (!context.IsEnd) + { + if (TryProceedSpecialSymbol(context)) continue; + + handlerManager.TryHandle(context, buffer, tags, tagStack); + + context.Advance(); + } + + FlushBuffer(); + + tagProcessor.Process(tags, tagStack); + + return tags; + } + + private bool TryProceedSpecialSymbol(TokenizerContext context) + { + switch (context.Current) + { + case '\n': + { + FlushBuffer(); + var token = new NewLineToken(); + tags.Add(token); + context.Advance(); + + return true; + } + case ' ': + { + if (buffer.Length > 0) + { + tags.Add(new TextToken(buffer.ToString())); + buffer.Clear(); + } + + buffer.Append(context.Current); + context.Advance(); + + return true; + } + case '\\': + FlushBuffer(); + tags.Add(new SlashToken()); + context.Advance(); + + return true; + default: + return false; + } + } + + private void FlushBuffer() + { + if (buffer.Length > 0) + { + tags.Add(new TextToken(buffer.ToString())); + buffer.Clear(); + } + } +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/NodeAction.cs b/cs/Markdown/TreeBuilder/NodeAction.cs new file mode 100644 index 000000000..48f27395f --- /dev/null +++ b/cs/Markdown/TreeBuilder/NodeAction.cs @@ -0,0 +1,20 @@ +using Markdown.TreeBuilder.Nodes; + +namespace Markdown.TreeBuilder; + +public abstract class NodeAction +{ + public class OpenNode : NodeAction + { + public Node Node { get; } + public OpenNode(Node node) => Node = node; + } + + public class CloseNode : NodeAction + { + } + + public class SkipNode : NodeAction + { + } +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/NodeFactory.cs b/cs/Markdown/TreeBuilder/NodeFactory.cs new file mode 100644 index 000000000..1c16920ca --- /dev/null +++ b/cs/Markdown/TreeBuilder/NodeFactory.cs @@ -0,0 +1,23 @@ +using Markdown.Tokenizer.Tags; +using Markdown.TreeBuilder.Nodes; + +namespace Markdown.TreeBuilder; + +public class NodeFactory : INodeFactory +{ + public NodeAction? CreateNode(Token token) + { + return token switch + { + { TagStatus: TagStatus.Broken } => null, + ItalicTag { TagStatus: TagStatus.Open } => new NodeAction.OpenNode(new ItalicNode()), + ItalicTag { TagStatus: TagStatus.Closed } => new NodeAction.CloseNode(), + BoldTag { TagStatus: TagStatus.Open } => new NodeAction.OpenNode(new BoldNode()), + BoldTag { TagStatus: TagStatus.Closed } => new NodeAction.CloseNode(), + SlashToken { TagStatus: TagStatus.Escaped } => new NodeAction.SkipNode(), + HeaderTag => new NodeAction.OpenNode(new HeaderNode()), + NewLineToken => new NodeAction.SkipNode(), + _ => null + }; + } +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/Nodes/BoldNode.cs b/cs/Markdown/TreeBuilder/Nodes/BoldNode.cs new file mode 100644 index 000000000..c83686c86 --- /dev/null +++ b/cs/Markdown/TreeBuilder/Nodes/BoldNode.cs @@ -0,0 +1,7 @@ +namespace Markdown.TreeBuilder.Nodes; + +public class BoldNode : Node +{ + public override string OpenTag => ""; + public override string CloseTag => ""; +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/Nodes/HeaderNode.cs b/cs/Markdown/TreeBuilder/Nodes/HeaderNode.cs new file mode 100644 index 000000000..f7f608ef5 --- /dev/null +++ b/cs/Markdown/TreeBuilder/Nodes/HeaderNode.cs @@ -0,0 +1,7 @@ +namespace Markdown.TreeBuilder.Nodes; + +public class HeaderNode : Node +{ + public override string OpenTag => "

"; + public override string CloseTag => "

"; +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/Nodes/ItalicNode.cs b/cs/Markdown/TreeBuilder/Nodes/ItalicNode.cs new file mode 100644 index 000000000..fb8caa564 --- /dev/null +++ b/cs/Markdown/TreeBuilder/Nodes/ItalicNode.cs @@ -0,0 +1,7 @@ +namespace Markdown.TreeBuilder.Nodes; + +public class ItalicNode : Node +{ + public override string OpenTag => ""; + public override string CloseTag => ""; +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/Nodes/MainNode.cs b/cs/Markdown/TreeBuilder/Nodes/MainNode.cs new file mode 100644 index 000000000..c6d4b4869 --- /dev/null +++ b/cs/Markdown/TreeBuilder/Nodes/MainNode.cs @@ -0,0 +1,5 @@ +namespace Markdown.TreeBuilder.Nodes; + +public class MainNode : Node +{ +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/Nodes/Node.cs b/cs/Markdown/TreeBuilder/Nodes/Node.cs new file mode 100644 index 000000000..cafc9062c --- /dev/null +++ b/cs/Markdown/TreeBuilder/Nodes/Node.cs @@ -0,0 +1,10 @@ +namespace Markdown.TreeBuilder.Nodes; + +public abstract class Node +{ + public List Children { get; } = new(); + public Node? Parent { get; set; } + + public virtual string OpenTag => string.Empty; + public virtual string CloseTag => string.Empty; +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/Nodes/TextNode.cs b/cs/Markdown/TreeBuilder/Nodes/TextNode.cs new file mode 100644 index 000000000..62a4c96b2 --- /dev/null +++ b/cs/Markdown/TreeBuilder/Nodes/TextNode.cs @@ -0,0 +1,6 @@ +namespace Markdown.TreeBuilder.Nodes; + +public class TextNode : Node +{ + public string? Value { get; init; } = string.Empty; +} \ No newline at end of file diff --git a/cs/Markdown/TreeBuilder/TreeBuilder.cs b/cs/Markdown/TreeBuilder/TreeBuilder.cs new file mode 100644 index 000000000..f5fbbb44d --- /dev/null +++ b/cs/Markdown/TreeBuilder/TreeBuilder.cs @@ -0,0 +1,42 @@ +using Markdown.Tokenizer.Tags; +using Markdown.TreeBuilder.Nodes; + +namespace Markdown.TreeBuilder; + +public class TreeBuilder(INodeFactory nodeFactory) +{ + public Node Build(List tokens) + { + Node mainNode = new MainNode(); + var currentNode = mainNode; + + foreach (var token in tokens) + { + var nodeAction = nodeFactory.CreateNode(token); + + if (nodeAction == null) + { + currentNode.Children.Add(new TextNode { Value = token.Value }); + continue; + } + + switch (nodeAction) + { + case NodeAction.OpenNode openNode: + currentNode.Children.Add(openNode.Node); + openNode.Node.Parent = currentNode; + currentNode = openNode.Node; + break; + + case NodeAction.CloseNode: + currentNode = currentNode.Parent ?? currentNode; + break; + + case NodeAction.SkipNode: + break; + } + } + + return mainNode; + } +} \ No newline at end of file diff --git a/cs/NewMarkdown/NewMarkdown.csproj b/cs/NewMarkdown/NewMarkdown.csproj new file mode 100644 index 000000000..c6cecb9b6 --- /dev/null +++ b/cs/NewMarkdown/NewMarkdown.csproj @@ -0,0 +1,16 @@ + + + + net8.0 + enable + enable + + + + + + + + + + diff --git a/cs/NewMarkdown/Node.cs b/cs/NewMarkdown/Node.cs new file mode 100644 index 000000000..9d710792f --- /dev/null +++ b/cs/NewMarkdown/Node.cs @@ -0,0 +1,14 @@ +namespace NewMarkdown; + +public class Node +{ + public string? Text { get; set; } + public NodeType Type { get; set; } + public List? Children { get; set; } + + public Node(NodeType type, string? text = null) + { + Type = type; + Text = text; + } +} \ No newline at end of file diff --git a/cs/NewMarkdown/NodeType.cs b/cs/NewMarkdown/NodeType.cs new file mode 100644 index 000000000..3d0217835 --- /dev/null +++ b/cs/NewMarkdown/NodeType.cs @@ -0,0 +1,9 @@ +namespace NewMarkdown; + +public enum NodeType +{ + Header, + Italic, + Bold, + Text +} \ No newline at end of file diff --git a/cs/NewMarkdown/Tag.cs b/cs/NewMarkdown/Tag.cs new file mode 100644 index 000000000..14a22f5bf --- /dev/null +++ b/cs/NewMarkdown/Tag.cs @@ -0,0 +1,6 @@ +namespace NewMarkdown; + +public class Tag +{ + +} \ No newline at end of file diff --git a/cs/NewMarkdown/TextReader.cs b/cs/NewMarkdown/TextReader.cs new file mode 100644 index 000000000..2706733d5 --- /dev/null +++ b/cs/NewMarkdown/TextReader.cs @@ -0,0 +1,17 @@ +namespace NewMarkdown.Lexer; + + +public class TextReader +{ + private string text; + private int position; + + public TextReader(string text) + { + this.text = text; + } + + public bool IsEnd => position >= text.Length; + public void MoveNext(int step = 1) => position += step; + public char Current => text[position]; +} \ No newline at end of file diff --git a/cs/NewMarkdown/Tokenizer.cs b/cs/NewMarkdown/Tokenizer.cs new file mode 100644 index 000000000..fcc4e7ab7 --- /dev/null +++ b/cs/NewMarkdown/Tokenizer.cs @@ -0,0 +1,30 @@ +using System.Text; +using TextReader = NewMarkdown.Lexer.TextReader; + +namespace NewMarkdown; + +public class Tokenizer +{ + private readonly List result = new List(); + public Tokenizer() + { } + + public List Tokenize(string text) + { + var reader = new TextReader(text); + var buffer = new StringBuilder(); + while (!reader.IsEnd) + { + TryParseHeader(reader); + } + result.Add(new Node(NodeType.Text, buffer.ToString())); + return result; + } + + // Если попадается слэш, то проверяем экранирует ли он + + private void TryParseHeader(TextReader reader) + { + if(reader.Current == "#" && reader.) + } +} \ No newline at end of file diff --git a/cs/NewMarkdown/TokenizerTest.cs b/cs/NewMarkdown/TokenizerTest.cs new file mode 100644 index 000000000..2395f8cf9 --- /dev/null +++ b/cs/NewMarkdown/TokenizerTest.cs @@ -0,0 +1,17 @@ +namespace NewMarkdown; + +[TestFixture] +public class TokenizerTest +{ + [Test] + public void Test() + { + var tokenizer = new Tokenizer(); + var result = tokenizer.Tokenize("This is a sample text."); + + foreach (var node in result) + { + Console.WriteLine($"{node.Type}: {node.Text}"); + } + } +} \ No newline at end of file diff --git a/cs/clean-code.sln b/cs/clean-code.sln index 2206d54db..253798549 100644 --- a/cs/clean-code.sln +++ b/cs/clean-code.sln @@ -9,6 +9,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ControlDigit", "ControlDigi EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Samples", "Samples\Samples.csproj", "{C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Markdown", "Markdown\Markdown.csproj", "{BB96272E-4341-4D87-9C3F-98CCFBFF7332}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,5 +29,9 @@ Global {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Debug|Any CPU.Build.0 = Debug|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.ActiveCfg = Release|Any CPU {C3EF41D7-50EF-4CE1-B30A-D1D81C93D7FA}.Release|Any CPU.Build.0 = Release|Any CPU + {BB96272E-4341-4D87-9C3F-98CCFBFF7332}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {BB96272E-4341-4D87-9C3F-98CCFBFF7332}.Debug|Any CPU.Build.0 = Debug|Any CPU + {BB96272E-4341-4D87-9C3F-98CCFBFF7332}.Release|Any CPU.ActiveCfg = Release|Any CPU + {BB96272E-4341-4D87-9C3F-98CCFBFF7332}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection EndGlobal