-
Notifications
You must be signed in to change notification settings - Fork 300
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Минеев Максим #243
base: master
Are you sure you want to change the base?
Минеев Максим #243
Changes from 5 commits
e93a708
c47ccc8
4452683
f01e8e5
d6a5ead
d932240
4490d82
cdc67f4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -239,3 +239,4 @@ _Pvt_Extensions | |
**/.idea | ||
**/.vscode | ||
**/node_modules | ||
/cs/Markdown/Markdown.csproj.DotSettings |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
using Markdown.Enums; | ||
|
||
namespace Markdown.AstNodes; | ||
|
||
public class BoldMarkdownNode : MarkdownNode, IMarkdownNodeWithChildren | ||
{ | ||
public override MarkdownNodeName Type => MarkdownNodeName.Bold; | ||
public List<MarkdownNode> Children { get; } = []; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
using Markdown.Enums; | ||
|
||
namespace Markdown.AstNodes; | ||
|
||
public class HeadingMarkdownNode : MarkdownNode, IMarkdownNodeWithChildren | ||
{ | ||
public override MarkdownNodeName Type => MarkdownNodeName.Heading; | ||
public List<MarkdownNode> Children { get; } = []; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
namespace Markdown.AstNodes; | ||
|
||
public interface IMarkdownNodeWithChildren | ||
{ | ||
public List<MarkdownNode> Children { get; } | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
using Markdown.Enums; | ||
|
||
namespace Markdown.AstNodes; | ||
|
||
public class ItalicMarkdownNode : MarkdownNode, IMarkdownNodeWithChildren | ||
{ | ||
public override MarkdownNodeName Type => MarkdownNodeName.Italic; | ||
public List<MarkdownNode> Children { get; } = []; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
using Markdown.Enums; | ||
|
||
namespace Markdown.AstNodes; | ||
|
||
public abstract class MarkdownNode | ||
{ | ||
public abstract MarkdownNodeName Type { get; } | ||
|
||
public override bool Equals(object? obj) | ||
{ | ||
if (this is IMarkdownNodeWithChildren node && obj is IMarkdownNodeWithChildren other) | ||
return this.GetType() == other.GetType() && node.Children.SequenceEqual(other.Children); | ||
if (this is TextMarkdownNode valueNode && obj is TextMarkdownNode otherValueNode) | ||
return valueNode.Content.Equals(otherValueNode.Content); | ||
return false; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
using Markdown.Enums; | ||
|
||
namespace Markdown.AstNodes; | ||
|
||
public class RootMarkdownNode : MarkdownNode, IMarkdownNodeWithChildren | ||
{ | ||
public override MarkdownNodeName Type => MarkdownNodeName.Root; | ||
public List<MarkdownNode> Children { get; } = []; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
using Markdown.Enums; | ||
|
||
namespace Markdown.AstNodes; | ||
|
||
public class TextMarkdownNode(string content) : MarkdownNode | ||
{ | ||
public override MarkdownNodeName Type => MarkdownNodeName.Text; | ||
public string Content => content; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
namespace Markdown.Enums; | ||
|
||
public enum MarkdownNodeName | ||
{ | ||
Bold, | ||
Italic, | ||
Heading, | ||
Text, | ||
Root, | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
namespace Markdown.Enums; | ||
|
||
public enum MarkdownTokenName | ||
{ | ||
Italic, | ||
Bold, | ||
Heading, | ||
Text, | ||
NewLine, | ||
Space, | ||
Number, | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
namespace Markdown; | ||
|
||
public interface ILexer | ||
{ | ||
List<IToken> Tokenize(string input); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
using Markdown.AstNodes; | ||
|
||
namespace Markdown; | ||
|
||
public interface IParser | ||
{ | ||
RootMarkdownNode Parse(List<IToken> tokens); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
using Markdown.Enums; | ||
|
||
namespace Markdown; | ||
|
||
public interface IToken | ||
{ | ||
MarkdownTokenName Name { get; } | ||
int Position { get; } | ||
int Length { get; } | ||
string Value { get; } | ||
int GetIndexToNextToken(); | ||
bool Is(MarkdownTokenName type); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<OutputType>Exe</OutputType> | ||
<TargetFramework>net8.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
</PropertyGroup> | ||
|
||
</Project> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,216 @@ | ||
using System.Text; | ||
using Markdown.Tokens; | ||
|
||
namespace Markdown; | ||
|
||
public class MarkdownLexer : ILexer | ||
{ | ||
private int position; | ||
private readonly List<IToken> tokens = []; | ||
private const string DoubleGround = "__"; | ||
private const string Ground = "_"; | ||
private const string Escape = "\\"; | ||
private const char GroundChar = '_'; | ||
private const char SharpChar = '#'; | ||
private const char EscapeChar = '\\'; | ||
private const char NewLineChar = '\n'; | ||
private const char SpaceChar = ' '; | ||
private readonly char[] escapedChars = [SharpChar, GroundChar, EscapeChar, NewLineChar]; | ||
|
||
public List<IToken> Tokenize(string input) | ||
{ | ||
position = 0; | ||
var nestingStack = new Stack<string>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Лучше position сделать локальной переменной чтобы была одинаковая логика работы с position, nestingStack, input There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Если position сделать локальной переменной, то возникнет несколько неудобств:
Поэтому мне кажется, что более красиво и аккуратно будет оставить position полем класса, а чтобы сделать логику работы более одинаковой, можно вынести стек тоже полем класса и не передавать его каждый раз параметром (с инпутом так поступить не можем, потому что получем его параметром в методе, вынести инициализацию в конструктор тоже не можем, потому что нарушим контракт метода Tokenize и не сможем получать в конвертере не конкретную реализацию лексера, а интерфейс) |
||
|
||
while (position < input.Length) | ||
{ | ||
switch (input[position]) | ||
{ | ||
case SpaceChar: | ||
ParseSpaceAndAdvance(); | ||
break; | ||
case NewLineChar: | ||
ParseNewLineAndAdvance(nestingStack); | ||
break; | ||
case EscapeChar: | ||
ParseEscapeAndAdvance(input); | ||
break; | ||
case GroundChar: | ||
ParseItalicOrBoldAndAdvance(input, nestingStack); | ||
break; | ||
case SharpChar: | ||
ParseHeadingAndAdvance(input); | ||
break; | ||
default: | ||
ParseTextAndAdvance(input); | ||
break; | ||
} | ||
} | ||
|
||
return tokens; | ||
} | ||
|
||
private void ParseSpaceAndAdvance() => tokens.Add(new SpaceToken(position++)); | ||
|
||
private void ParseHeadingAndAdvance(string input) | ||
{ | ||
if (NextIsSpace(input) && IsStartOfParagraph(input)) tokens.Add(new HeadingToken(position++)); | ||
else tokens.Add(new TextToken(position, "#")); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. лучше константу заиспользовать |
||
position++; | ||
} | ||
|
||
private void ParseTextAndAdvance(string input) | ||
{ | ||
var value = new StringBuilder(); | ||
var start = position; | ||
var endChars = new[] { SharpChar, GroundChar, NewLineChar, EscapeChar, SpaceChar }; | ||
while (position < input.Length && !endChars.Contains(input[position]) && !CurrentIsDigit(input)) | ||
value.Append(input[position++]); | ||
|
||
if (value.Length > 0) tokens.Add(new TextToken(start, value.ToString())); | ||
if (position < input.Length && CurrentIsDigit(input)) ParseNumberAndAdvance(input); | ||
} | ||
|
||
|
||
private void ParseNumberAndAdvance(string input) | ||
{ | ||
var sb = new StringBuilder(); | ||
var start = position; | ||
while (position < input.Length && (CurrentIsDigit(input) || input[position] == GroundChar)) | ||
sb.Append(input[position++]); | ||
tokens.Add(new NumberToken(start, sb.ToString())); | ||
} | ||
|
||
private void ParseItalicOrBoldAndAdvance(string input, Stack<string> stack) | ||
{ | ||
var isDoubleGround = NextIsGround(input); | ||
var isTripleGround = NextIsDoubleGround(input); | ||
var isSingleGround = !isTripleGround && !isDoubleGround; | ||
if (stack.Count == 0) ParseItalicOrBoldAndAdvanceWhenStackEmpty(isSingleGround, isTripleGround, stack); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. можно заменить на switch |
||
else if (stack.Count == 1) | ||
ParseItalicOrBoldAndAdvanceWhenStackHasOne(isSingleGround, isDoubleGround, isTripleGround, stack); | ||
else if (stack.Count == 2) ParseItalicOrBoldAndAdvanceWhenStackHasTwo(isSingleGround, isTripleGround, stack); | ||
} | ||
|
||
private void ParseItalicOrBoldAndAdvanceWhenStackEmpty(bool isSingleGround, bool isTripleGround, | ||
Stack<string> stack) | ||
{ | ||
if (isSingleGround) | ||
{ | ||
ParseItalicAndAdvance(); | ||
stack.Push(Ground); | ||
return; | ||
} | ||
|
||
ParseBoldAndAdvance(); | ||
stack.Push(DoubleGround); | ||
if (!isTripleGround) return; | ||
ParseItalicAndAdvance(); | ||
stack.Push(Ground); | ||
} | ||
|
||
private void ParseItalicOrBoldAndAdvanceWhenStackHasOne(bool isSingleGround, bool isDoubleGround, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. сейчас реализована такая логика, что мы кладем токен в стек и в зависимости от последующих токенов мы токен из стека можем убрать, что привело к усложнению кода (ParseItalicOrBoldAndAdvanceWhenStackHasOne, ParseItalicOrBoldAndAdvanceWhenStackHasTwo и т.д.). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. На самом деле в стек всегда сразу добавляются корректные токены (открывающие токены в соответствии с разрешенной вложенностью), но трудность заключается в следующем: в процессе лексического анализа нам надо правильно (хотя бы частично) обрабатывать вложенность, чтобы на этапе парсинга получать уже "правильный" набор токенов ( Изначально вся логика обработки этой вложенности была написана в ParseItalicOrBoldAndAdvance, но метод вырос до +-150 строк кода и очень глубокой вложенностью if-ов, и, чтобы хоть немного его уменьшить и сделать более декларативным я разнес эту логику по разным функциям, но получились такие страшные названия (называл прямо "в лоб", ровно то, что делает функция, возможно можно было назвать более удачно). В итоге я так и не придумал что-то лучше, чтобы код был более понятным и не нагруженным, мне показалось, что в текущем состоянии функции получились в достаточной мере декларативными, но с очень длинными названиями (от этого избавиться мне не удалось) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Кажется большинство проблем бы решилось и код упростился, если бы в стек клали не строки (двойное или одинарное подчеркивание), а char (только одинарные подчеркивания) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Возможно, подумаю над этим |
||
bool isTripleGround, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. вместо булевых флагов можно передавать int |
||
Stack<string> stack) | ||
{ | ||
switch (stack.Peek()) | ||
{ | ||
case DoubleGround when isSingleGround: | ||
ParseItalicAndAdvance(); | ||
stack.Push(Ground); | ||
break; | ||
case DoubleGround: | ||
{ | ||
if (isTripleGround) ParseItalicAndAdvance(); | ||
ParseBoldAndAdvance(); | ||
stack.Pop(); | ||
break; | ||
} | ||
case Ground: | ||
{ | ||
if (isTripleGround) | ||
{ | ||
ParseBoldAndAdvance(); | ||
ParseItalicAndAdvance(); | ||
} | ||
else if (isDoubleGround) | ||
{ | ||
tokens.Add(new TextToken(position, DoubleGround)); | ||
position += 2; | ||
} | ||
else ParseItalicAndAdvance(); | ||
|
||
stack.Pop(); | ||
break; | ||
} | ||
} | ||
} | ||
|
||
private void ParseItalicOrBoldAndAdvanceWhenStackHasTwo(bool isSingleGround, bool isTripleGround, | ||
Stack<string> stack) | ||
{ | ||
if (isSingleGround) | ||
{ | ||
ParseItalicAndAdvance(); | ||
stack.Pop(); | ||
return; | ||
} | ||
|
||
if (isTripleGround) ParseItalicAndAdvance(); | ||
ParseBoldAndAdvance(); | ||
|
||
stack.Pop(); | ||
stack.Pop(); | ||
} | ||
|
||
private void ParseBoldAndAdvance() | ||
{ | ||
tokens.Add(new BoldToken(position)); | ||
position += 2; | ||
} | ||
|
||
private void ParseItalicAndAdvance() | ||
{ | ||
tokens.Add(new ItalicToken(position)); | ||
position++; | ||
} | ||
|
||
private void ParseNewLineAndAdvance(Stack<string> stack) | ||
{ | ||
tokens.Add(new NewLineToken(position)); | ||
stack.Clear(); | ||
position++; | ||
} | ||
|
||
private void ParseEscapeAndAdvance(string input) | ||
{ | ||
if (position + 1 >= input.Length) | ||
{ | ||
tokens.Add(new TextToken(position++, Escape)); | ||
return; | ||
} | ||
|
||
if (NextIsDoubleGround(input)) | ||
{ | ||
tokens.Add(new TextToken(position, DoubleGround)); | ||
position += 3; | ||
return; | ||
} | ||
|
||
var next = input[position + 1]; | ||
tokens.Add(escapedChars.Contains(next) | ||
? new TextToken(position, next.ToString()) | ||
: new TextToken(position, Escape + next)); | ||
position += 2; | ||
} | ||
|
||
private bool NextIsDoubleGround(string input) => | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Здесь напрашивается сделать инпут valueObject'ом и все эти методы перетащить туда |
||
position + 2 < input.Length && input[position + 1] == GroundChar && input[position + 2] == GroundChar; | ||
|
||
private bool NextIsSpace(string input) => position + 1 < input.Length && input[position + 1] == SpaceChar; | ||
private bool NextIsGround(string input) => position + 1 < input.Length && input[position + 1] == GroundChar; | ||
private bool CurrentIsDigit(string input) => char.IsDigit(input[position]); | ||
|
||
private bool IsStartOfParagraph(string input) => | ||
position == 0 || position > 0 && input[position - 1] == NewLineChar; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
заметила что некоторые символы инициализированы дважды (например, DoubleGround в MarkdownLexer и в MarkdownParser). Исходя из этого предложение вынести эти константы в отдельный класс и использовать везде его