-- Converts Vim :help files to HTML. Validates |tag| links and document syntax (parser errors).
--
-- NOTE: :helptags checks for duplicate tags, whereas this script checks _links_ (to tags).
--
-- USAGE (GENERATE HTML):
-- 1. Run `make helptags` first; this script depends on vim.fn.taglist().
-- 2. nvim -V1 -es --clean +"lua require('scripts.gen_help_html').gen('./build/runtime/doc/', 'target/dir/')"
-- - Read the docstring at gen().
-- 3. cd target/dir/ && jekyll serve --host 0.0.0.0
-- 4. Visit http://localhost:4000/…/help.txt.html
--
-- USAGE (VALIDATE):
-- 1. nvim -V1 -es +"lua require('scripts.gen_help_html').validate()"
-- - validate() is 10x faster than gen(), so it is used in CI.
--
-- SELF-TEST MODE:
-- 1. nvim -V1 -es +"lua require('scripts.gen_help_html')._test()"
--
-- NOTES:
-- * gen() and validate() are the primary entrypoints. validate() only exists because gen() is too
-- slow (~1 min) to run in per-commit CI.
-- * visit_node() is the core function used by gen() to traverse the document tree and produce HTML.
-- * visit_validate() is the core function used by validate().
-- * Files in `new_layout` will be generated with a "flow" layout instead of preformatted/fixed-width layout.
local tagmap = nil
local helpfiles = nil
local invalid_links = {}
local invalid_urls = {}
local invalid_spelling = {}
local spell_dict = {
Neovim = 'Nvim',
NeoVim = 'Nvim',
neovim = 'Nvim',
lua = 'Lua',
}
local M = {}
-- These files are generated with "flow" layout (non fixed-width, wrapped text paragraphs).
-- All other files are "legacy" files which require fixed-width layout.
local new_layout = {
['api.txt'] = true,
['channel.txt'] = true,
['develop.txt'] = true,
['luaref.txt'] = true,
['news.txt'] = true,
['nvim.txt'] = true,
['pi_health.txt'] = true,
['provider.txt'] = true,
['ui.txt'] = true,
}
-- TODO: These known invalid |links| require an update to the relevant docs.
local exclude_invalid = {
["'previewpopup'"] = "quickref.txt",
["'pvp'"] = "quickref.txt",
["'string'"] = "eval.txt",
Query = "treesitter.txt",
["eq?"] = "treesitter.txt",
["lsp-request"] = "lsp.txt",
matchit = "vim_diff.txt",
["matchit.txt"] = "help.txt",
["set!"] = "treesitter.txt",
["v:_null_blob"] = "builtin.txt",
["v:_null_dict"] = "builtin.txt",
["v:_null_list"] = "builtin.txt",
["v:_null_string"] = "builtin.txt",
["vim.lsp.buf_request()"] = "lsp.txt",
["vim.lsp.util.get_progress_messages()"] = "lsp.txt",
["vim.treesitter.start()"] = "treesitter.txt",
}
local function tofile(fname, text)
local f = io.open(fname, 'w')
if not f then
error(('failed to write: %s'):format(f))
else
f:write(text)
f:close()
end
end
local function html_esc(s)
return s:gsub(
'&', '&'):gsub(
'<', '<'):gsub(
'>', '>')
end
local function url_encode(s)
-- Credit: tpope / vim-unimpaired
-- NOTE: these chars intentionally *not* escaped: ' ( )
return vim.fn.substitute(vim.fn.iconv(s, 'latin1', 'utf-8'),
[=[[^A-Za-z0-9()'_.~-]]=],
[=[\="%".printf("%02X",char2nr(submatch(0)))]=],
'g')
end
local function expandtabs(s)
return s:gsub('\t', (' '):rep(8))
end
local function to_titlecase(s)
local text = ''
for w in vim.gsplit(s, '[ \t]+') do
text = ('%s %s%s'):format(text, vim.fn.toupper(w:sub(1, 1)), w:sub(2))
end
return text
end
local function to_heading_tag(text)
-- Prepend "_" to avoid conflicts with actual :help tags.
return text and string.format('_%s', vim.fn.tolower((text:gsub('%s+', '-')))) or 'unknown'
end
local function basename_noext(f)
return vim.fs.basename(f:gsub('%.txt', ''))
end
local function is_blank(s)
return not not s:find([[^[\t ]*$]])
end
local function trim(s, dir)
return vim.fn.trim(s, '\r\t\n ', dir or 0)
end
-- Remove common punctuation from URLs.
--
-- TODO: fix this in the parser instead... https://github.com/neovim/tree-sitter-vimdoc
--
-- @returns (fixed_url, removed_chars) where `removed_chars` is in the order found in the input.
local function fix_url(url)
local removed_chars = ''
local fixed_url = url
-- Remove up to one of each char from end of the URL, in this order.
for _, c in ipairs({ '.', ')', }) do
if fixed_url:sub(-1) == c then
removed_chars = c .. removed_chars
fixed_url = fixed_url:sub(1, -2)
end
end
return fixed_url, removed_chars
end
-- Checks if a given line is a "noise" line that doesn't look good in HTML form.
local function is_noise(line, noise_lines)
if (
-- First line is always noise.
(noise_lines ~= nil and vim.tbl_count(noise_lines) == 0)
or line:find('Type .*gO.* to see the table of contents')
-- Title line of traditional :help pages.
-- Example: "NVIM REFERENCE MANUAL by ..."
or line:find([[^%s*N?VIM[ \t]*REFERENCE[ \t]*MANUAL]])
-- First line of traditional :help pages.
-- Example: "*api.txt* Nvim"
or line:find('%s*%*?[a-zA-Z]+%.txt%*?%s+N?[vV]im%s*$')
-- modeline
-- Example: "vim:tw=78:ts=8:sw=4:sts=4:et:ft=help:norl:"
or line:find('^%s*vi[m]%:.*ft=help')
or line:find('^%s*vi[m]%:.*filetype=help')
or line:find('[*>]local%-additions[*<]')
) then
-- table.insert(stats.noise_lines, getbuflinestr(root, opt.buf, 0))
table.insert(noise_lines or {}, line)
return true
end
return false
end
-- Creates a github issue URL at neovim/tree-sitter-vimdoc with prefilled content.
local function get_bug_url_vimdoc(fname, to_fname, sample_text)
local this_url = string.format('https://neovim.io/doc/user/%s', vim.fs.basename(to_fname))
local bug_url = ('https://github.com/neovim/tree-sitter-vimdoc/issues/new?labels=bug&title=parse+error%3A+'
..vim.fs.basename(fname)
..'+&body=Found+%60tree-sitter-vimdoc%60+parse+error+at%3A+'
..this_url
..'%0D%0DContext%3A%0D%0D%60%60%60%0D'
..url_encode(sample_text)
..'%0D%60%60%60')
return bug_url
end
-- Creates a github issue URL at neovim/neovim with prefilled content.
local function get_bug_url_nvim(fname, to_fname, sample_text, token_name)
local this_url = string.format('https://neovim.io/doc/user/%s', vim.fs.basename(to_fname))
local bug_url = ('https://github.com/neovim/neovim/issues/new?labels=bug&title=user+docs+HTML%3A+'
..vim.fs.basename(fname)
..'+&body=%60gen_help_html.lua%60+problem+at%3A+'
..this_url
..'%0D'
..(token_name and '+unhandled+token%3A+%60'..token_name..'%60' or '')
..'%0DContext%3A%0D%0D%60%60%60%0D'
..url_encode(sample_text)
..'%0D%60%60%60')
return bug_url
end
-- Gets a "foo.html" name from a "foo.txt" helpfile name.
local function get_helppage(f)
if not f then
return nil
end
-- Special case: help.txt is the "main landing page" of :help files, not index.txt.
if f == 'index.txt' then
return 'vimindex.html'
elseif f == 'help.txt' then
return 'index.html'
end
return (f:gsub('%.txt$', '.html'))
end
-- Counts leading spaces (tab=8) to decide the indent size of multiline text.
--
-- Blank lines (empty or whitespace-only) are ignored.
local function get_indent(s)
local min_indent = nil
for line in vim.gsplit(s, '\n') do
if line and not is_blank(line) then
local ws = expandtabs(line:match('^%s+') or '')
min_indent = (not min_indent or ws:len() < min_indent) and ws:len() or min_indent
end
end
return min_indent or 0
end
-- Removes the common indent level, after expanding tabs to 8 spaces.
local function trim_indent(s)
local indent_size = get_indent(s)
local trimmed = ''
for line in vim.gsplit(s, '\n') do
line = expandtabs(line)
trimmed = ('%s%s\n'):format(trimmed, line:sub(indent_size + 1))
end
return trimmed:sub(1, -2)
end
-- Gets raw buffer text in the node's range (+/- an offset), as a newline-delimited string.
local function getbuflinestr(node, bufnr, offset)
local line1, _, line2, _ = node:range()
line1 = line1 - offset
line2 = line2 + offset
local lines = vim.fn.getbufline(bufnr, line1 + 1, line2 + 1)
return table.concat(lines, '\n')
end
local function get_tagname(node, bufnr)
local text = vim.treesitter.get_node_text(node, bufnr)
local tag = (node:type() == 'optionlink' or node:parent():type() == 'optionlink') and ("'%s'"):format(text) or text
local helpfile = vim.fs.basename(tagmap[tag]) or nil -- "api.txt"
local helppage = get_helppage(helpfile) -- "api.html"
return helppage, tag
end
-- Returns true if the given invalid tagname is a false positive.
local function ignore_invalid(s)
return not not (
exclude_invalid[s]
-- Strings like |~/====| appear in various places and the parser thinks they are links, but they
-- are just table borders.
or s:find('===')
or s:find('---')
)
end
local function ignore_parse_error(s)
-- Ignore parse errors for unclosed codespan/optionlink/tag.
-- This is common in vimdocs and is treated as plaintext by :help.
return s:find("^[`'|*]")
end
local function has_ancestor(node, ancestor_name)
local p = node
while true do
p = p:parent()
if not p or p:type() == 'help_file' then
break
elseif p:type() == ancestor_name then
return true
end
end
return false
end
local function validate_link(node, bufnr, fname)
local helppage, tagname = get_tagname(node:child(1), bufnr)
local ignored = false
if not tagmap[tagname] then
ignored = has_ancestor(node, 'column_heading') or node:has_error() or ignore_invalid(tagname)
if not ignored then
invalid_links[tagname] = vim.fs.basename(fname)
end
end
return helppage, tagname, ignored
end
-- Traverses the tree at `root` and checks that |tag| links point to valid helptags.
local function visit_validate(root, level, lang_tree, opt, stats)
level = level or 0
local node_name = (root.named and root:named()) and root:type() or nil
local toplevel = level < 1
local function node_text(node)
return vim.treesitter.get_node_text(node or root, opt.buf)
end
local text = trim(node_text())
if root:child_count() > 0 then
for node, _ in root:iter_children() do
if node:named() then
visit_validate(node, level + 1, lang_tree, opt, stats)
end
end
end
if node_name == 'ERROR' then
if ignore_parse_error(text) then
return
end
-- Store the raw text to give context to the error report.
local sample_text = not toplevel and getbuflinestr(root, opt.buf, 3) or '[top level!]'
table.insert(stats.parse_errors, sample_text)
elseif node_name == 'word' or node_name == 'uppercase_name' then
if spell_dict[text] then
if not invalid_spelling[text] then
invalid_spelling[text] = { vim.fs.basename(opt.fname) }
else
table.insert(invalid_spelling[text], vim.fs.basename(opt.fname))
end
end
elseif node_name == 'url' then
if text:find('http%:') then
invalid_urls[text] = vim.fs.basename(opt.fname)
end
elseif node_name == 'taglink' or node_name == 'optionlink' then
local _, _, _ = validate_link(root, opt.buf, opt.fname)
end
end
-- Generates HTML from node `root` recursively.
local function visit_node(root, level, lang_tree, headings, opt, stats)
level = level or 0
local node_name = (root.named and root:named()) and root:type() or nil
-- Previous sibling kind (string).
local prev = root:prev_sibling() and (root:prev_sibling().named and root:prev_sibling():named()) and root:prev_sibling():type() or nil
-- Next sibling kind (string).
local next_ = root:next_sibling() and (root:next_sibling().named and root:next_sibling():named()) and root:next_sibling():type() or nil
-- Parent kind (string).
local parent = root:parent() and root:parent():type() or nil
local text = ''
local trimmed
local function node_text(node)
return vim.treesitter.get_node_text(node or root, opt.buf)
end
-- Gets leading whitespace of the current node.
local function ws()
return node_text():match('^%s+') or ''
end
if root:named_child_count() == 0 or node_name == 'ERROR' then
text = node_text()
trimmed = html_esc(trim(text))
text = html_esc(text)
else
-- Process children and join them with whitespace.
for node, _ in root:iter_children() do
if node:named() then
local r = visit_node(node, level + 1, lang_tree, headings, opt, stats)
text = string.format('%s%s', text, r)
end
end
trimmed = trim(text)
end
if node_name == 'help_file' then -- root node
return text
elseif node_name == 'url' then
local fixed_url, removed_chars = fix_url(trimmed)
return ('%s%s%s'):format(ws(), fixed_url, fixed_url, removed_chars)
elseif node_name == 'word' or node_name == 'uppercase_name' then
return text
elseif node_name == 'h1' or node_name == 'h2' or node_name == 'h3' then
if is_noise(text, stats.noise_lines) then
return '' -- Discard common "noise" lines.
end
-- Remove "===" and tags from ToC text.
local hname = (node_text():gsub('%-%-%-%-+', ''):gsub('%=%=%=%=+', ''):gsub('%*.*%*', ''))
if node_name == 'h1' or #headings == 0 then
table.insert(headings, { name = hname, subheadings = {}, })
else
table.insert(headings[#headings].subheadings, { name = hname, subheadings = {}, })
end
local el = node_name == 'h1' and 'h2' or 'h3'
return ('<%s class="help-heading">%s%s>\n'):format(to_heading_tag(hname), el, text, el)
elseif node_name == 'column_heading' or node_name == 'column_name' then
if root:has_error() then
return text
end
return ('
%s%s
'):format(ws(), trimmed)
elseif node_name == 'block' then
if is_blank(text) then
return ''
end
if opt.old then
-- XXX: Treat "old" docs as preformatted: they use indentation for layout.
-- Trim trailing newlines to avoid too much whitespace between divs.
return ('
%s
\n'):format(trim(text, 2))
end
return string.format('
\n%s\n
\n', text)
elseif node_name == 'line' then
if parent ~= 'codeblock' and (is_blank(text) or is_noise(text, stats.noise_lines)) then
return '' -- Discard common "noise" lines.
end
-- XXX: Avoid newlines (too much whitespace) after block elements in old (preformatted) layout.
local div = opt.old and root:child(0) and vim.tbl_contains({'column_heading', 'h1', 'h2', 'h3'}, root:child(0):type())
return string.format('%s%s', div and trim(text) or text, div and '' or '\n')
elseif node_name == 'line_li' then
local sib = root:prev_sibling()
local prev_li = sib and sib:type() == 'line_li'
if not prev_li then
opt.indent = 1
else
-- The previous listitem _sibling_ is _logically_ the _parent_ if it is indented less.
local parent_indent = get_indent(node_text(sib))
local this_indent = get_indent(node_text())
if this_indent > parent_indent then
opt.indent = opt.indent + 1
elseif this_indent < parent_indent then
opt.indent = math.max(1, opt.indent - 1)
end
end
local margin = opt.indent == 1 and '' or ('margin-left: %drem;'):format((1.5 * opt.indent))
return string.format('
%s
', margin, text)
elseif node_name == 'taglink' or node_name == 'optionlink' then
local helppage, tagname, ignored = validate_link(root, opt.buf, opt.fname)
if ignored then
return text
end
return ('%s%s'):format(ws(), helppage, url_encode(tagname), html_esc(tagname))
elseif vim.tbl_contains({'codespan', 'keycode'}, node_name) then
if root:has_error() then
return text
end
return ('%s%s'):format(ws(), trimmed)
elseif node_name == 'argument' then
return ('%s{%s}'):format(ws(), text)
elseif node_name == 'codeblock' then
if is_blank(text) then
return ''
end
return ('
%s
'):format(trim(trim_indent(text), 2))
elseif node_name == 'tag' then -- anchor
if root:has_error() then
return text
end
local in_heading = vim.tbl_count({'h1', 'h2', 'h3'}, parent)
local cssclass = (not in_heading and get_indent(node_text()) > 8) and 'help-tag-right' or 'help-tag'
local tagname = node_text(root:child(1))
if vim.tbl_count(stats.first_tags) < 2 then
-- First 2 tags in the doc will be anchored at the main heading.
table.insert(stats.first_tags, tagname)
return ''
end
local s = ('%s%s'):format(ws(), url_encode(tagname), cssclass, trimmed)
if in_heading and prev ~= 'tag' then
-- Start the container for tags in a heading.
-- This makes "justify-content:space-between" right-align the tags.
--
foo bartag1 tag2
return string.format('%s', s)
elseif in_heading and next_ == nil then
-- End the container for tags in a heading.
return string.format('%s', s)
end
return s
elseif node_name == 'ERROR' then
if ignore_parse_error(trimmed) then
return text
end
-- Store the raw text to give context to the bug report.
local sample_text = level > 0 and getbuflinestr(root, opt.buf, 3) or '[top level!]'
table.insert(stats.parse_errors, sample_text)
return ('%s'):format(
get_bug_url_vimdoc(opt.fname, opt.to_fname, sample_text), trimmed)
else -- Unknown token.
local sample_text = level > 0 and getbuflinestr(root, opt.buf, 3) or '[top level!]'
return ('%s'):format(
node_name, get_bug_url_nvim(opt.fname, opt.to_fname, sample_text, node_name), trimmed), ('unknown-token:"%s"'):format(node_name)
end
end
local function get_helpfiles(include)
local dir = './build/runtime/doc'
local rv = {}
for f, type in vim.fs.dir(dir) do
if (vim.endswith(f, '.txt')
and type == 'file'
and (not include or vim.tbl_contains(include, f))) then
local fullpath = vim.fn.fnamemodify(('%s/%s'):format(dir, f), ':p')
table.insert(rv, fullpath)
end
end
return rv
end
-- Populates the helptags map.
local function get_helptags(help_dir)
local m = {}
-- Load a random help file to convince taglist() to do its job.
vim.cmd(string.format('split %s/api.txt', help_dir))
vim.cmd('lcd %:p:h')
for _, item in ipairs(vim.fn.taglist('.*')) do
if vim.endswith(item.filename, '.txt') then
m[item.name] = item.filename
end
end
vim.cmd('q!')
return m
end
-- Use the help.so parser defined in the build, not whatever happens to be installed on the system.
local function ensure_runtimepath()
if not vim.o.runtimepath:find('build/lib/nvim/') then
vim.cmd[[set runtimepath^=./build/lib/nvim/]]
end
end
-- Opens `fname` in a buffer and gets a treesitter parser for the buffer contents.
--
-- @returns lang_tree, bufnr
local function parse_buf(fname)
local buf
if type(fname) == 'string' then
vim.cmd('split '..vim.fn.fnameescape(fname)) -- Filename.
buf = vim.api.nvim_get_current_buf()
else
buf = fname
vim.cmd('sbuffer '..tostring(fname)) -- Buffer number.
end
-- vim.treesitter.require_language('help', './build/lib/nvim/parser/help.so')
local lang_tree = vim.treesitter.get_parser(buf, 'help')
return lang_tree, buf
end
-- Validates one :help file `fname`:
-- - checks that |tag| links point to valid helptags.
-- - recursively counts parse errors ("ERROR" nodes)
--
-- @returns { invalid_links: number, parse_errors: number }
local function validate_one(fname)
local stats = {
parse_errors = {},
}
local lang_tree, buf = parse_buf(fname)
for _, tree in ipairs(lang_tree:trees()) do
visit_validate(tree:root(), 0, tree, { buf = buf, fname = fname, }, stats)
end
lang_tree:destroy()
vim.cmd.close()
return stats
end
-- Generates HTML from one :help file `fname` and writes the result to `to_fname`.
--
-- @param fname Source :help file
-- @param to_fname Destination .html file
-- @param old boolean Preformat paragraphs (for old :help files which are full of arbitrary whitespace)
--
-- @returns html, stats
local function gen_one(fname, to_fname, old, commit)
local stats = {
noise_lines = {},
parse_errors = {},
first_tags = {}, -- Track the first few tags in doc.
}
local lang_tree, buf = parse_buf(fname)
local headings = {} -- Headings (for ToC). 2-dimensional: h1 contains h2/h3.
local title = to_titlecase(basename_noext(fname))
local html = ([[
%s - Neovim docs
]]):format(title)
local logo_svg = [[
]]
local main = ''
for _, tree in ipairs(lang_tree:trees()) do
main = main .. (visit_node(tree:root(), 0, tree, headings,
{ buf = buf, old = old, fname = fname, to_fname = to_fname, indent = 1, },
stats))
end
main = ([[
]]
local n = 0 -- Count of all headings + subheadings.
for _, h1 in ipairs(headings) do n = n + 1 + #h1.subheadings end
for _, h1 in ipairs(headings) do
toc = toc .. ('
%s\n'):format(to_heading_tag(h1.name), h1.name)
if n < 30 or #headings < 10 then -- Show subheadings only if there aren't too many.
for _, h2 in ipairs(h1.subheadings) do
toc = toc .. ('