diff options
author | Josh Rahm <joshuarahm@gmail.com> | 2024-11-19 22:57:13 +0000 |
---|---|---|
committer | Josh Rahm <joshuarahm@gmail.com> | 2024-11-19 22:57:13 +0000 |
commit | 9be89f131f87608f224f0ee06d199fcd09d32176 (patch) | |
tree | 11022dcfa9e08cb4ac5581b16734196128688d48 /scripts/text_utils.lua | |
parent | ff7ed8f586589d620a806c3758fac4a47a8e7e15 (diff) | |
parent | 88085c2e80a7e3ac29aabb6b5420377eed99b8b6 (diff) | |
download | rneovim-9be89f131f87608f224f0ee06d199fcd09d32176.tar.gz rneovim-9be89f131f87608f224f0ee06d199fcd09d32176.tar.bz2 rneovim-9be89f131f87608f224f0ee06d199fcd09d32176.zip |
Merge remote-tracking branch 'upstream/master' into mix_20240309
Diffstat (limited to 'scripts/text_utils.lua')
-rw-r--r-- | scripts/text_utils.lua | 363 |
1 files changed, 0 insertions, 363 deletions
diff --git a/scripts/text_utils.lua b/scripts/text_utils.lua deleted file mode 100644 index 75b3bfedd5..0000000000 --- a/scripts/text_utils.lua +++ /dev/null @@ -1,363 +0,0 @@ -local fmt = string.format - ---- @class nvim.text_utils.MDNode ---- @field [integer] nvim.text_utils.MDNode ---- @field type string ---- @field text? string - -local INDENTATION = 4 - -local NBSP = string.char(160) - -local M = {} - -local function contains(t, xs) - return vim.tbl_contains(xs, t) -end - ---- @param txt string ---- @param srow integer ---- @param scol integer ---- @param erow? integer ---- @param ecol? integer ---- @return string -local function slice_text(txt, srow, scol, erow, ecol) - local lines = vim.split(txt, '\n') - - if srow == erow then - return lines[srow + 1]:sub(scol + 1, ecol) - end - - if erow then - -- Trim the end - for _ = erow + 2, #lines do - table.remove(lines, #lines) - end - end - - -- Trim the start - for _ = 1, srow do - table.remove(lines, 1) - end - - lines[1] = lines[1]:sub(scol + 1) - lines[#lines] = lines[#lines]:sub(1, ecol) - - return table.concat(lines, '\n') -end - ---- @param text string ---- @return nvim.text_utils.MDNode -local function parse_md_inline(text) - local parser = vim.treesitter.languagetree.new(text, 'markdown_inline') - local root = parser:parse(true)[1]:root() - - --- @param node TSNode - --- @return nvim.text_utils.MDNode? - local function extract(node) - local ntype = node:type() - - if ntype:match('^%p$') then - return - end - - --- @type table<any,any> - local ret = { type = ntype } - ret.text = vim.treesitter.get_node_text(node, text) - - local row, col = 0, 0 - - for child, child_field in node:iter_children() do - local e = extract(child) - if e and ntype == 'inline' then - local srow, scol = child:start() - if (srow == row and scol > col) or srow > row then - local t = slice_text(ret.text, row, col, srow, scol) - if t and t ~= '' then - table.insert(ret, { type = 'text', j = true, text = t }) - end - end - row, col = child:end_() - end - - if child_field then - ret[child_field] = e - else - table.insert(ret, e) - end - end - - if ntype == 'inline' and (row > 0 or col > 0) then - local t = slice_text(ret.text, row, col) - if t and t ~= '' then - table.insert(ret, { type = 'text', text = t }) - end - end - - return ret - end - - return extract(root) or {} -end - ---- @param text string ---- @return nvim.text_utils.MDNode -local function parse_md(text) - local parser = vim.treesitter.languagetree.new(text, 'markdown', { - injections = { markdown = '' }, - }) - - local root = parser:parse(true)[1]:root() - - local EXCLUDE_TEXT_TYPE = { - list = true, - list_item = true, - section = true, - document = true, - fenced_code_block = true, - fenced_code_block_delimiter = true, - } - - --- @param node TSNode - --- @return nvim.text_utils.MDNode? - local function extract(node) - local ntype = node:type() - - if ntype:match('^%p$') or contains(ntype, { 'block_continuation' }) then - return - end - - --- @type table<any,any> - local ret = { type = ntype } - - if not EXCLUDE_TEXT_TYPE[ntype] then - ret.text = vim.treesitter.get_node_text(node, text) - end - - if ntype == 'inline' then - ret = parse_md_inline(ret.text) - end - - for child, child_field in node:iter_children() do - local e = extract(child) - if child_field then - ret[child_field] = e - else - table.insert(ret, e) - end - end - - return ret - end - - return extract(root) or {} -end - ---- @param x string ---- @param start_indent integer ---- @param indent integer ---- @param text_width integer ---- @return string -function M.wrap(x, start_indent, indent, text_width) - local words = vim.split(vim.trim(x), '%s+') - local parts = { string.rep(' ', start_indent) } --- @type string[] - local count = indent - - for i, w in ipairs(words) do - if count > indent and count + #w > text_width - 1 then - parts[#parts + 1] = '\n' - parts[#parts + 1] = string.rep(' ', indent) - count = indent - elseif i ~= 1 then - parts[#parts + 1] = ' ' - count = count + 1 - end - count = count + #w - parts[#parts + 1] = w - end - - return (table.concat(parts):gsub('%s+\n', '\n'):gsub('\n+$', '')) -end - ---- @param node nvim.text_utils.MDNode ---- @param start_indent integer ---- @param indent integer ---- @param text_width integer ---- @param level integer ---- @return string[] -local function render_md(node, start_indent, indent, text_width, level, is_list) - local parts = {} --- @type string[] - - -- For debugging - local add_tag = false - -- local add_tag = true - - local ntype = node.type - - if add_tag then - parts[#parts + 1] = '<' .. ntype .. '>' - end - - if ntype == 'text' then - parts[#parts + 1] = node.text - elseif ntype == 'html_tag' then - error('html_tag: ' .. node.text) - elseif ntype == 'inline_link' then - vim.list_extend(parts, { '*', node[1].text, '*' }) - elseif ntype == 'shortcut_link' then - if node[1].text:find('^<.*>$') then - parts[#parts + 1] = node[1].text - else - vim.list_extend(parts, { '|', node[1].text, '|' }) - end - elseif ntype == 'backslash_escape' then - parts[#parts + 1] = node.text - elseif ntype == 'emphasis' then - parts[#parts + 1] = node.text:sub(2, -2) - elseif ntype == 'code_span' then - vim.list_extend(parts, { '`', node.text:sub(2, -2):gsub(' ', NBSP), '`' }) - elseif ntype == 'inline' then - if #node == 0 then - local text = assert(node.text) - parts[#parts + 1] = M.wrap(text, start_indent, indent, text_width) - else - for _, child in ipairs(node) do - vim.list_extend(parts, render_md(child, start_indent, indent, text_width, level + 1)) - end - end - elseif ntype == 'paragraph' then - local pparts = {} - for _, child in ipairs(node) do - vim.list_extend(pparts, render_md(child, start_indent, indent, text_width, level + 1)) - end - parts[#parts + 1] = M.wrap(table.concat(pparts), start_indent, indent, text_width) - parts[#parts + 1] = '\n' - elseif ntype == 'code_fence_content' then - local lines = vim.split(node.text:gsub('\n%s*$', ''), '\n') - - local cindent = indent + INDENTATION - if level > 3 then - -- The tree-sitter markdown parser doesn't parse the code blocks indents - -- correctly in lists. Fudge it! - lines[1] = ' ' .. lines[1] -- ¯\_(ツ)_/¯ - cindent = indent - level - local _, initial_indent = lines[1]:find('^%s*') - initial_indent = initial_indent + cindent - if initial_indent < indent then - cindent = indent - INDENTATION - end - end - - for _, l in ipairs(lines) do - if #l > 0 then - parts[#parts + 1] = string.rep(' ', cindent) - parts[#parts + 1] = l - end - parts[#parts + 1] = '\n' - end - elseif ntype == 'fenced_code_block' then - parts[#parts + 1] = '>' - for _, child in ipairs(node) do - if child.type == 'info_string' then - parts[#parts + 1] = child.text - break - end - end - parts[#parts + 1] = '\n' - for _, child in ipairs(node) do - if child.type ~= 'info_string' then - vim.list_extend(parts, render_md(child, start_indent, indent, text_width, level + 1)) - end - end - parts[#parts + 1] = '<\n' - elseif ntype == 'html_block' then - local text = node.text:gsub('^<pre>help', '') - text = text:gsub('</pre>%s*$', '') - parts[#parts + 1] = text - elseif ntype == 'list_marker_dot' then - parts[#parts + 1] = node.text - elseif contains(ntype, { 'list_marker_minus', 'list_marker_star' }) then - parts[#parts + 1] = '• ' - elseif ntype == 'list_item' then - parts[#parts + 1] = string.rep(' ', indent) - local offset = node[1].type == 'list_marker_dot' and 3 or 2 - for i, child in ipairs(node) do - local sindent = i <= 2 and 0 or (indent + offset) - vim.list_extend( - parts, - render_md(child, sindent, indent + offset, text_width, level + 1, true) - ) - end - else - if node.text then - error(fmt('cannot render:\n%s', vim.inspect(node))) - end - for i, child in ipairs(node) do - local start_indent0 = i == 1 and start_indent or indent - vim.list_extend( - parts, - render_md(child, start_indent0, indent, text_width, level + 1, is_list) - ) - if ntype ~= 'list' and i ~= #node then - if (node[i + 1] or {}).type ~= 'list' then - parts[#parts + 1] = '\n' - end - end - end - end - - if add_tag then - parts[#parts + 1] = '</' .. ntype .. '>' - end - - return parts -end - ---- @param text_width integer -local function align_tags(text_width) - --- @param line string - --- @return string - return function(line) - local tag_pat = '%s*(%*.+%*)%s*$' - local tags = {} - for m in line:gmatch(tag_pat) do - table.insert(tags, m) - end - - if #tags > 0 then - line = line:gsub(tag_pat, '') - local tags_str = ' ' .. table.concat(tags, ' ') - --- @type integer - local conceal_offset = select(2, tags_str:gsub('%*', '')) - 2 - local pad = string.rep(' ', text_width - #line - #tags_str + conceal_offset) - return line .. pad .. tags_str - end - - return line - end -end - ---- @param text string ---- @param start_indent integer ---- @param indent integer ---- @param is_list? boolean ---- @return string -function M.md_to_vimdoc(text, start_indent, indent, text_width, is_list) - -- Add an extra newline so the parser can properly capture ending ``` - local parsed = parse_md(text .. '\n') - local ret = render_md(parsed, start_indent, indent, text_width, 0, is_list) - - local lines = vim.split(table.concat(ret):gsub(NBSP, ' '), '\n') - - lines = vim.tbl_map(align_tags(text_width), lines) - - local s = table.concat(lines, '\n') - - -- Reduce whitespace in code-blocks - s = s:gsub('\n+%s*>([a-z]+)\n', ' >%1\n') - s = s:gsub('\n+%s*>\n?\n', ' >\n') - - return s -end - -return M |