From b7521d444d33517bbcbe033b183f3324f57dcd18 Mon Sep 17 00:00:00 2001 From: "Axel H." Date: Tue, 7 Jan 2025 23:28:35 +0100 Subject: [PATCH 1/2] fix(code-blocks): Fixes multiple language parsing cases (tilde, spaces before/after, directives...) Fixes #234 --- lua/markview/parser.lua | 53 ++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/lua/markview/parser.lua b/lua/markview/parser.lua index e3b9ed7..40fd1d1 100644 --- a/lua/markview/parser.lua +++ b/lua/markview/parser.lua @@ -34,6 +34,18 @@ parser.escape_string = function (input) return input; end + +--- Extract fenced coode block header +--- return a 2-tuple (fence, infostring) +parser.get_fence = function(line) + for _, pattern in pairs({"```+", "~~~+"}) do + local fence, info = line:match("^%s*(" .. pattern .. ")%s*(.-)%s*$"); + if fence ~= nil then + return fence, info; + end + end +end + parser.get_md_len = function (text) local final_string = text; local len = vim.fn.strdisplaywidth(text); @@ -127,6 +139,7 @@ parser.filter_lines = function (buffer, from, to) local code_block_indent = 0; local desc_indent = 0; + local current_fence = ""; local start = 0; @@ -181,11 +194,14 @@ parser.filter_lines = function (buffer, from, to) parent_marker = line:match("^%s*(%d+[%)%.])"); end - if line:match("(```)") and withinCodeBlock ~= true then + local fence, _ = parser.get_fence(line) + if fence and withinCodeBlock ~= true then withinCodeBlock = true; + current_fence = fence; code_block_indent = spaces_before; - elseif line:match("(```)") and withinCodeBlock == true then + elseif withinCodeBlock == true and line:match(current_fence) then withinCodeBlock = false; + current_fence = ""; elseif withinCodeBlock == true then spaces_before = code_block_indent; goto withinElement; @@ -382,18 +398,27 @@ parser.md = function (buffer, TStree, from, to) local block_start = vim.api.nvim_buf_get_lines(buffer, row_start, row_start + 1, false)[1]; local language_string, additional_info = "", nil; - - if block_start:match("%s*```%{%{([^%}]*)%}%}") then - language_string = block_start:match("%s*```%{%{([^%}]*)%}%}"); - additional_info = block_start:match("%s*```%{%{[^%}]*%}%}%s*(.*)$"); - elseif block_start:match("%s*```%{([^%}]*)%}") then - language_string = block_start:match("%s*```%{([^%}]*)%}"); - additional_info = block_start:match("%s*```%{[^%}]*%}%s*(.*)$"); - elseif block_start:match("%s*```(%S*)$") then - language_string = block_start:match("%s*```(%S*)$"); - elseif block_start:match("%s*```(%S*)%s*") then - language_string = block_start:match("%s*```(%S*)%s"); - additional_info = block_start:match("%s*```%S*%s+(.*)$"); + local _, info = parser.get_fence(block_start); + + if info:match("%{%{([^%}]*)%}%}") then + language_string = info:match("%%{%{([^%}]*)%}%}"); + additional_info = info:match("%{%{[^%}]*%}%}%s*(.*)$"); + elseif info:match("%{code%S*%}%s*(%S+)$") then + -- Myst code blocks (code, code-block, code-cell) + -- https://mystmd.org/guide/code#code-blocks + language_string = info:match("%{code%S*%}%s*(%S*)$"); + elseif info:match("%{([^%}]*)%}") then + -- Other {}-wrapped directive with unknown processing + language_string = info:match("%{([^%}]*)%}"); + additional_info = info:match("%{[^%}]*%}%s*(.*)$"); + elseif info:match("(%S-)%s+(.*)$") then + -- Language string and additional info + -- https://spec.commonmark.org/0.31.2/#example-143 + language_string, additional_info = info:match("(%S-)%s+(.*)$"); + elseif info:match("(%S*)%s*$") then + -- Language string without additional info + -- https://spec.commonmark.org/0.31.2/#example-143 + language_string = info:match("(%S*)%s*$"); end local code_lines = vim.api.nvim_buf_get_lines(buffer, row_start + 1, row_end - 1, false); From 534dfc8d2beee518a3be7725dd6f7bd799cf64a4 Mon Sep 17 00:00:00 2001 From: "Axel H." Date: Wed, 8 Jan 2025 02:44:54 +0100 Subject: [PATCH 2/2] fix(editor): fixes editor for all fences format --- lua/markview/extras/editor.lua | 34 +++++++---------------- lua/markview/languages.lua | 50 ++++++++++++++++++++++++++++++++++ lua/markview/parser.lua | 41 +++------------------------- 3 files changed, 64 insertions(+), 61 deletions(-) diff --git a/lua/markview/extras/editor.lua b/lua/markview/extras/editor.lua index 85e74da..64cdf27 100644 --- a/lua/markview/extras/editor.lua +++ b/lua/markview/extras/editor.lua @@ -44,7 +44,8 @@ editor.configuraton = { local start, col, stop, _ = TSNode:range(); local lines = vim.api.nvim_buf_get_lines(buffer, start, stop, false); - local ft = lines[1]:match("```(%S*)") or "lua"; + local _, info = languages.get_fence(lines[1]) + local ft = languages.info(info); local _l = {}; table.remove(lines, 1); @@ -54,7 +55,7 @@ editor.configuraton = { table.insert(_l, line:sub(col, #line)) end - return ft:gsub("[%{%}]", ""), _l, start + 1, stop - 1; + return ft, _l, start + 1, stop - 1; end }, appliers = { @@ -62,7 +63,8 @@ editor.configuraton = { local start, _, _, _ = TSNode:range(); local delimiter = vim.api.nvim_buf_get_lines(buffer, start, start + 1, false)[1]; - local before = delimiter:match("^(.-)```"); + local fence = languages.get_fence(delimiter) + local before = delimiter:match("^(.-)" .. fence); for l, line in ipairs(lines) do lines[l] = (before or "") .. line @@ -89,23 +91,6 @@ editor.configuraton = { end } ---- Gets the filetype from an info string ----@param delim string ----@return string -local get_ft = function (delim) - local ft = ""; - - if delim:match("^```%{%{(.-)%}%}") then - ft = languages.get_ft(delim:match("^```%{%{(.-)%}%}")); - elseif delim:match("^```%{(.-)%}") then - ft = languages.get_ft(delim:match("^```%{(.-)%}")); - elseif delim:match("^```(%S+)") then - ft = languages.get_ft(delim:match("^```(%S+)")); - end - - return ft; -end - --- Creates a new buffer when not available. --- Otherwise, returns the current editor buffer. ---@return integer @@ -400,10 +385,11 @@ editor.create = function () end start_delim = tostring(input); - vim.bo[editor.buffer].filetype = get_ft(start_delim); - local icon, hl = languages.get_icon(get_ft(start_delim)); - - ft = get_ft(start_delim); + local fence, info = languages.get_fence(start_delim); + ft = languages.info(info); + end_delim = fence or end_delim; + vim.bo[editor.buffer].filetype = ft + local icon, hl = languages.get_icon(ft); hl = hl .. "Fg"; diff --git a/lua/markview/languages.lua b/lua/markview/languages.lua index 7b2d299..24bf001 100644 --- a/lua/markview/languages.lua +++ b/lua/markview/languages.lua @@ -395,6 +395,28 @@ languages.patterns = { ["html"] = "HTML" }; +--- Known language info string patterns +---@type string[] +languages.info_patterns = { + -- {{ lang }} params + "%{%{([^%}]*)%}%}%s*(.*)$", + -- Myst code blocks (code, code-block, code-cell) with language + -- https://mystmd.org/guide/code#code-blocks + "%{code%S*%}%s*(%S+)$", + -- Other {}-wrapped directive with unknown processing + "%{([^%}]*)%}%s*(.*)$", + -- Language string and additional info + -- https://spec.commonmark.org/0.31.2/#example-143 + "(%S-)%s+(.*)$", + -- Language string without additional info or no language + -- https://spec.commonmark.org/0.31.2/#example-143 + "(%S*)%s*$", +} + +--- Known code-block fences +---@type string[] +languages.fences = {"`", "~"} + --- Gets the language name from a string ---@param name string ---@return string @@ -434,4 +456,32 @@ languages.get_icon = function (ft) return languages.icons[ft] or languages.icons.default, hl, sign; end +--- Extract fenced code block header +---@param line string +---@return string|nil fence the matched fence string +---@return string info the matched info string +languages.get_fence = function(line) + for _, char in pairs(languages.fences) do + --- Match any supported fence, optionnaly indented or quoted + local fence, info = line:match("^>*%s*(" .. string.rep(char, 3) .. "+)%s*(.-)%s*$"); + if fence ~= nil then + return fence, info; + end + end + return nil, "" +end + +--- Extract language and parameters from an infostring +---@param info string the info string to parse +---@return string language the extracted language +---@return string|nil # Some optional extra data +languages.info = function (info) + for _, pattern in pairs(languages.info_patterns) do + local lang, extra = info:match(pattern); + if lang then + return lang, extra; + end + end +end + return languages; diff --git a/lua/markview/parser.lua b/lua/markview/parser.lua index 40fd1d1..c724c3f 100644 --- a/lua/markview/parser.lua +++ b/lua/markview/parser.lua @@ -34,18 +34,6 @@ parser.escape_string = function (input) return input; end - ---- Extract fenced coode block header ---- return a 2-tuple (fence, infostring) -parser.get_fence = function(line) - for _, pattern in pairs({"```+", "~~~+"}) do - local fence, info = line:match("^%s*(" .. pattern .. ")%s*(.-)%s*$"); - if fence ~= nil then - return fence, info; - end - end -end - parser.get_md_len = function (text) local final_string = text; local len = vim.fn.strdisplaywidth(text); @@ -194,7 +182,7 @@ parser.filter_lines = function (buffer, from, to) parent_marker = line:match("^%s*(%d+[%)%.])"); end - local fence, _ = parser.get_fence(line) + local fence = lang.get_fence(line) if fence and withinCodeBlock ~= true then withinCodeBlock = true; current_fence = fence; @@ -219,7 +207,7 @@ parser.filter_lines = function (buffer, from, to) then spaces_before = math.max(0, spaces_before - vim.fn.strchars((parent_marker or "") .. " ")); - if line:match("(```)") then + if fence then code_block_indent = spaces_before; elseif insideDescription == true then align_spaces[l] = 2; @@ -397,29 +385,8 @@ parser.md = function (buffer, TStree, from, to) local block_start = vim.api.nvim_buf_get_lines(buffer, row_start, row_start + 1, false)[1]; - local language_string, additional_info = "", nil; - local _, info = parser.get_fence(block_start); - - if info:match("%{%{([^%}]*)%}%}") then - language_string = info:match("%%{%{([^%}]*)%}%}"); - additional_info = info:match("%{%{[^%}]*%}%}%s*(.*)$"); - elseif info:match("%{code%S*%}%s*(%S+)$") then - -- Myst code blocks (code, code-block, code-cell) - -- https://mystmd.org/guide/code#code-blocks - language_string = info:match("%{code%S*%}%s*(%S*)$"); - elseif info:match("%{([^%}]*)%}") then - -- Other {}-wrapped directive with unknown processing - language_string = info:match("%{([^%}]*)%}"); - additional_info = info:match("%{[^%}]*%}%s*(.*)$"); - elseif info:match("(%S-)%s+(.*)$") then - -- Language string and additional info - -- https://spec.commonmark.org/0.31.2/#example-143 - language_string, additional_info = info:match("(%S-)%s+(.*)$"); - elseif info:match("(%S*)%s*$") then - -- Language string without additional info - -- https://spec.commonmark.org/0.31.2/#example-143 - language_string = info:match("(%S*)%s*$"); - end + local _, info = lang.get_fence(block_start); + local language_string, additional_info = lang.info(info) local code_lines = vim.api.nvim_buf_get_lines(buffer, row_start + 1, row_end - 1, false);