-- Converts Vim :help files to HTML. Validates |tag| links and document syntax (parser errors). -- -- USAGE (GENERATE HTML): -- 1. Run `make helptags` first; this script depends on vim.fn.taglist(). -- 2. nvim -V1 -es --clean +"lua require('scripts.gen_help_html').gen('./build/runtime/doc/', 'target/dir/')" -- - Read the docstring at gen(). -- 3. cd target/dir/ && jekyll serve --host 0.0.0.0 -- 4. Visit http://localhost:4000/…/help.txt.html -- -- USAGE (VALIDATE): -- 1. nvim -V1 -es +"lua require('scripts.gen_help_html').validate()" -- - validate() is 10x faster than gen(), so it is used in CI. -- -- SELF-TEST MODE: -- 1. nvim -V1 -es +"lua require('scripts.gen_help_html')._test()" -- -- NOTES: -- * gen() and validate() are the primary entrypoints. validate() only exists because gen() is too -- slow (~1 min) to run in per-commit CI. -- * visit_node() is the core function used by gen() to traverse the document tree and produce HTML. -- * visit_validate() is the core function used by validate(). -- * Files in `new_layout` will be generated with a "flow" layout instead of preformatted/fixed-width layout. -- -- parser bugs: -- * Should NOT be code_block: -- tab:xy The 'x' is always used, then 'y' as many times as will -- fit. Thus "tab:>-" displays: -- > -- >- -- >-- -- etc. -- -- tab:xyz The 'z' is always used, then 'x' is prepended, and -- then 'y' is used as many times as will fit. Thus -- "tab:<->" displays: -- > -- <> -- <-> -- <--> -- etc. -- * Should NOT be a "headline". Perhaps a "table" (or just "line"). -- expr5 and expr6 *expr5* *expr6* -- --------------- -- expr6 + expr6 Number addition, |List| or |Blob| concatenation *expr-+* -- expr6 - expr6 Number subtraction *expr--* -- expr6 . expr6 String concatenation *expr-.* -- expr6 .. expr6 String concatenation *expr-..* local tagmap = nil local helpfiles = nil local invalid_tags = {} local api = vim.api local M = {} -- These files are generated with "flow" layout (non fixed-width, wrapped text paragraphs). -- All other files are "legacy" files which require fixed-width layout. local new_layout = { ['api.txt'] = true, ['channel.txt'] = true, ['develop.txt'] = true, ['nvim.txt'] = true, ['pi_health.txt'] = true, ['provider.txt'] = true, ['ui.txt'] = true, } -- TODO: treesitter gets stuck on these files... local exclude = { ['filetype.txt'] = true, ['usr_24.txt'] = true, } local function tofile(fname, text) local f = io.open(fname, 'w') if not f then error(('failed to write: %s'):format(f)) else f:write(text) f:close() end end local function html_esc(s) if s:find('', '>') end local function url_encode(s) -- Credit: tpope / vim-unimpaired -- NOTE: these chars intentionally *not* escaped: ' ( ) return vim.fn.substitute(vim.fn.iconv(s, 'latin1', 'utf-8'), [=[[^A-Za-z0-9()'_.~-]]=], [=[\="%".printf("%02X",char2nr(submatch(0)))]=], 'g') end -- Removes the ">" and "<" chars that delineate a codeblock in Vim :help files. local function trim_gt_lt(s) return s:gsub('^%s*>%s*\n', ''):gsub('\n<', '') end local function expandtabs(s) return s:gsub('\t', (' '):rep(8)) end local function to_titlecase(s) local text = '' for w in vim.gsplit(s, '[ \t]+') do text = ('%s %s%s'):format(text, vim.fn.toupper(w:sub(1, 1)), w:sub(2)) end return text end local function to_heading_tag(text) -- Prepend "_" to avoid conflicts with actual :help tags. return text and string.format('_%s', vim.fn.tolower((text:gsub('%s+', '-')))) or 'unknown' end local function basename_noext(f) return vim.fs.basename(f:gsub('%.txt', '')) end local function is_blank(s) return not not s:find('^%s*$') end local function trim(s) return vim.trim(s) end local function trim_bullet(s) return s:gsub('^%s*[-*•]%s', '') end local function startswith_bullet(s) return s:find('^%s*[-*•]%s') end -- Checks if a given line is a "noise" line that doesn't look good in HTML form. local function is_noise(line) return ( line:find('Type .*gO.* to see the table of contents') -- Title line of traditional :help pages. -- Example: "NVIM REFERENCE MANUAL by ..." or line:find('^%s*N?VIM REFERENCE MANUAL') -- First line of traditional :help pages. -- Example: "*api.txt* Nvim" or line:find('%s*%*?[a-zA-Z]+%.txt%*?%s+N?[vV]im%s*$') -- modeline -- Example: "vim:tw=78:ts=8:sw=4:sts=4:et:ft=help:norl:" or line:find('^%s*vi[m]%:.*ft=help') or line:find('^%s*vi[m]%:.*filetype=help') ) end -- Creates a github issue URL at vigoux/tree-sitter-vimdoc with prefilled content. local function get_bug_url_vimdoc(fname, to_fname, sample_text) local this_url = string.format('https://neovim.io/doc/user/%s', vim.fs.basename(to_fname)) local bug_url = ('https://github.com/vigoux/tree-sitter-vimdoc/issues/new?labels=bug&title=parse+error%3A+' ..vim.fs.basename(fname) ..'+&body=Found+%60tree-sitter-vimdoc%60+parse+error+at%3A+' ..this_url ..'%0D%0DContext%3A%0D%0D%60%60%60%0D' ..url_encode(sample_text) ..'%0D%60%60%60') return bug_url end -- Creates a github issue URL at neovim/neovim with prefilled content. local function get_bug_url_nvim(fname, to_fname, sample_text, token_name) local this_url = string.format('https://neovim.io/doc/user/%s', vim.fs.basename(to_fname)) local bug_url = ('https://github.com/neovim/neovim/issues/new?labels=bug&title=user+docs+HTML%3A+' ..vim.fs.basename(fname) ..'+&body=%60gen_help_html.lua%60+problem+at%3A+' ..this_url ..'%0D' ..(token_name and '+unhandled+token%3A+%60'..token_name..'%60' or '') ..'%0DContext%3A%0D%0D%60%60%60%0D' ..url_encode(sample_text) ..'%0D%60%60%60') return bug_url end -- Gets a "foo.html" name from a "foo.txt" helpfile name. local function get_helppage(f) if not f then return nil end -- Special case: help.txt is the "main landing page" of :help files, not index.txt. if f == 'index.txt' then return 'vimindex.html' elseif f == 'help.txt' then return 'index.html' end return f:gsub('%.txt$', '.html') end -- Counts leading spaces (tab=8) to decide the indent size of multiline text. -- -- Blank lines (empty or whitespace-only) are ignored. local function get_indent(s) local min_indent = nil for line in vim.gsplit(s, '\n') do if line and not is_blank(line) then local ws = expandtabs(line:match('^%s+') or '') min_indent = (not min_indent or ws:len() < min_indent) and ws:len() or min_indent end end return min_indent or 0 end -- Removes the common indent level, after expanding tabs to 8 spaces. local function trim_indent(s) local indent_size = get_indent(s) local trimmed = '' for line in vim.gsplit(s, '\n') do line = expandtabs(line) trimmed = ('%s%s\n'):format(trimmed, line:sub(indent_size + 1)) end return trimmed:sub(1, -2) end -- Gets raw buffer text in the node's range (+/- an offset), as a newline-delimited string. local function getbuflinestr(node, bufnr, offset) local line1, _, line2, _ = node:range() line1 = line1 - offset line2 = line2 + offset local lines = vim.fn.getbufline(bufnr, line1 + 1, line2 + 1) return table.concat(lines, '\n') end -- Gets the whitespace just before `node` from the raw buffer text. -- Needed for preformatted `old` lines. local function getws(node, bufnr) local line1, c1, line2, _ = node:range() local raw = vim.fn.getbufline(bufnr, line1 + 1, line2 + 1)[1] local text_before = raw:sub(1, c1) local leading_ws = text_before:match('%s+$') or '' return leading_ws end local function get_tagname(node, bufnr, link) local node_name = (node.named and node:named()) and node:type() or nil local node_text = vim.treesitter.get_node_text(node, bufnr) local tag = ((node_name == 'option' and node_text) or (link and node_text:gsub('^|', ''):gsub('|$', '') or node_text:gsub('^%*', ''):gsub('%*$', ''))) local helpfile = tag and vim.fs.basename(tagmap[tag]) or nil -- "api.txt" local helppage = get_helppage(helpfile) -- "api.html" return helppage, tag end -- Traverses the tree at `root` and checks that |tag| links point to valid helptags. local function visit_validate(root, level, lang_tree, opt, stats) level = level or 0 local node_name = (root.named and root:named()) and root:type() or nil local toplevel = level < 1 if root:child_count() > 0 then for node, _ in root:iter_children() do if node:named() then visit_validate(node, level + 1, lang_tree, opt, stats) end end end if node_name == 'ERROR' then -- Store the raw text to give context to the bug report. local sample_text = not toplevel and getbuflinestr(root, opt.buf, 3) or '[top level!]' table.insert(stats.parse_errors, sample_text) elseif node_name == 'hotlink' or node_name == 'option' then local _, tagname = get_tagname(root, opt.buf, true) if not root:has_error() and not tagmap[tagname] then invalid_tags[tagname] = vim.fs.basename(opt.fname) end end end -- Generates HTML from node `root` recursively. local function visit_node(root, level, lang_tree, headings, opt, stats) level = level or 0 local node_name = (root.named and root:named()) and root:type() or nil -- Previous sibling kind (string). local prev = root:prev_sibling() and (root:prev_sibling().named and root:prev_sibling():named()) and root:prev_sibling():type() or nil -- Next sibling kind (string). local next_ = root:next_sibling() and (root:next_sibling().named and root:next_sibling():named()) and root:next_sibling():type() or nil -- Parent kind (string). local parent = root:parent() and root:parent():type() or nil local text = '' local toplevel = level < 1 local function node_text() return vim.treesitter.get_node_text(root, opt.buf) end if root:child_count() == 0 then text = node_text() else -- Process children and join them with whitespace. for node, _ in root:iter_children() do if node:named() then local r = visit_node(node, level + 1, lang_tree, headings, opt, stats) local ws = r == '' and '' or ((opt.old and (node:type() == 'word' or not node:named())) and getws(node, opt.buf) or ' ') text = string.format('%s%s%s', text, ws, r) end end end local trimmed = trim(text) if node_name == 'help_file' then -- root node return text elseif node_name == 'word' or node_name == 'uppercase_name' then if parent == 'headline' then -- Start a new heading item, or update the current one. local n = (prev == nil or #headings == 0) and #headings + 1 or #headings headings[n] = string.format('%s%s', headings[n] and headings[n]..' ' or '', text) end return html_esc(text) elseif node_name == 'headline' then return ('
%s
'):format(html_esc(text))
elseif node_name == 'argument' then
return ('{%s}
'):format(html_esc(trimmed))
elseif node_name == 'code_block' then
return ('\n%s\n'):format(html_esc(trim_indent(trim_gt_lt(text)))) elseif node_name == 'tag' then -- anchor local _, tagname = get_tagname(root, opt.buf, false) local s = ('%s'):format(url_encode(tagname), trimmed) if parent == 'headline' and prev ~= 'tag' then -- Start the container for tags in a heading. -- This makes "justify-content:space-between" right-align the tags. --