-- Converts Vim :help files to HTML. Validates |tag| links and document syntax (parser errors). -- -- USAGE (GENERATE HTML): -- 1. Run `make helptags` first; this script depends on vim.fn.taglist(). -- 2. nvim -V1 -es --clean +"lua require('scripts.gen_help_html').gen('./build/runtime/doc/', 'target/dir/')" -- - Read the docstring at gen(). -- 3. cd target/dir/ && jekyll serve --host 0.0.0.0 -- 4. Visit http://localhost:4000/…/help.txt.html -- -- USAGE (VALIDATE): -- 1. nvim -V1 -es +"lua require('scripts.gen_help_html').validate()" -- - validate() is 10x faster than gen(), so it is used in CI. -- -- SELF-TEST MODE: -- 1. nvim -V1 -es +"lua require('scripts.gen_help_html')._test()" -- -- NOTES: -- * gen() and validate() are the primary entrypoints. validate() only exists because gen() is too -- slow (~1 min) to run in per-commit CI. -- * visit_node() is the core function used by gen() to traverse the document tree and produce HTML. -- * visit_validate() is the core function used by validate(). -- * Files in `new_layout` will be generated with a "flow" layout instead of preformatted/fixed-width layout. -- -- parser bugs: -- * Should NOT be code_block: -- tab:xy The 'x' is always used, then 'y' as many times as will -- fit. Thus "tab:>-" displays: -- > -- >- -- >-- -- etc. -- -- tab:xyz The 'z' is always used, then 'x' is prepended, and -- then 'y' is used as many times as will fit. Thus -- "tab:<->" displays: -- > -- <> -- <-> -- <--> -- etc. -- * Should NOT be a "headline". Perhaps a "table" (or just "line"). -- expr5 and expr6 *expr5* *expr6* -- --------------- -- expr6 + expr6 Number addition, |List| or |Blob| concatenation *expr-+* -- expr6 - expr6 Number subtraction *expr--* -- expr6 . expr6 String concatenation *expr-.* -- expr6 .. expr6 String concatenation *expr-..* local tagmap = nil local helpfiles = nil local invalid_tags = {} local commit = '?' local api = vim.api local M = {} -- These files are generated with "flow" layout (non fixed-width, wrapped text paragraphs). -- All other files are "legacy" files which require fixed-width layout. local new_layout = { ['api.txt'] = true, ['channel.txt'] = true, ['develop.txt'] = true, ['nvim.txt'] = true, ['pi_health.txt'] = true, ['provider.txt'] = true, ['ui.txt'] = true, } -- TODO: treesitter gets stuck on these files... local exclude = { ['filetype.txt'] = true, ['usr_24.txt'] = true, } local function tofile(fname, text) local f = io.open(fname, 'w') if not f then error(('failed to write: %s'):format(f)) else f:write(text) f:close() end end local function html_esc(s) if s:find('', '>') end local function url_encode(s) -- Credit: tpope / vim-unimpaired -- NOTE: these chars intentionally *not* escaped: ' ( ) return vim.fn.substitute(vim.fn.iconv(s, 'latin1', 'utf-8'), [=[[^A-Za-z0-9()'_.~-]]=], [=[\="%".printf("%02X",char2nr(submatch(0)))]=], 'g') end -- Removes the ">" and "<" chars that delineate a codeblock in Vim :help files. local function trim_gt_lt(s) return s:gsub('^%s*>%s*\n', ''):gsub('\n<', '') end local function expandtabs(s) return s:gsub('\t', (' '):rep(8)) end local function to_titlecase(s) local text = '' for w in vim.gsplit(s, '[ \t]+') do text = ('%s %s%s'):format(text, vim.fn.toupper(w:sub(1, 1)), w:sub(2)) end return text end local function to_heading_tag(text) -- Prepend "_" to avoid conflicts with actual :help tags. return text and string.format('_%s', vim.fn.tolower((text:gsub('%s+', '-')))) or 'unknown' end local function basename_noext(f) return vim.fs.basename(f:gsub('%.txt', '')) end local function is_blank(s) return not not s:find('^%s*$') end local function trim(s) return vim.trim(s) end local function trim_bullet(s) return s:gsub('^%s*[-*•]%s', '') end local function startswith_bullet(s) return s:find('^%s*[-*•]%s') end -- Checks if a given line is a "noise" line that doesn't look good in HTML form. local function is_noise(line) return ( line:find('Type .*gO.* to see the table of contents') -- Title line of traditional :help pages. -- Example: "NVIM REFERENCE MANUAL by ..." or line:find('^%s*N?VIM REFERENCE MANUAL') -- First line of traditional :help pages. -- Example: "*api.txt* Nvim" or line:find('%s*%*?[a-zA-Z]+%.txt%*?%s+N?[vV]im%s*$') -- modeline -- Example: "vim:tw=78:ts=8:sw=4:sts=4:et:ft=help:norl:" or line:find('^%s*vi[m]%:.*ft=help') or line:find('^%s*vi[m]%:.*filetype=help') ) end -- Creates a github issue URL at vigoux/tree-sitter-vimdoc with prefilled content. local function get_bug_url_vimdoc(fname, to_fname, sample_text) local this_url = string.format('https://neovim.io/doc/user/%s', vim.fs.basename(to_fname)) local bug_url = ('https://github.com/vigoux/tree-sitter-vimdoc/issues/new?labels=bug&title=parse+error%3A+' ..vim.fs.basename(fname) ..'+&body=Found+%60tree-sitter-vimdoc%60+parse+error+at%3A+' ..this_url ..'%0D%0DContext%3A%0D%0D%60%60%60%0D' ..url_encode(sample_text) ..'%0D%60%60%60') return bug_url end -- Creates a github issue URL at neovim/neovim with prefilled content. local function get_bug_url_nvim(fname, to_fname, sample_text, token_name) local this_url = string.format('https://neovim.io/doc/user/%s', vim.fs.basename(to_fname)) local bug_url = ('https://github.com/neovim/neovim/issues/new?labels=bug&title=user+docs+HTML%3A+' ..vim.fs.basename(fname) ..'+&body=%60gen_help_html.lua%60+problem+at%3A+' ..this_url ..'%0D' ..(token_name and '+unhandled+token%3A+%60'..token_name..'%60' or '') ..'%0DContext%3A%0D%0D%60%60%60%0D' ..url_encode(sample_text) ..'%0D%60%60%60') return bug_url end -- Gets a "foo.html" name from a "foo.txt" helpfile name. local function get_helppage(f) if not f then return nil end -- Special case: help.txt is the "main landing page" of :help files, not index.txt. if f == 'index.txt' then return 'vimindex.html' elseif f == 'help.txt' then return 'index.html' end return f:gsub('%.txt$', '.html') end -- Counts leading spaces (tab=8) to decide the indent size of multiline text. -- -- Blank lines (empty or whitespace-only) are ignored. local function get_indent(s) local min_indent = nil for line in vim.gsplit(s, '\n') do if line and not is_blank(line) then local ws = expandtabs(line:match('^%s+') or '') min_indent = (not min_indent or ws:len() < min_indent) and ws:len() or min_indent end end return min_indent or 0 end -- Removes the common indent level, after expanding tabs to 8 spaces. local function trim_indent(s) local indent_size = get_indent(s) local trimmed = '' for line in vim.gsplit(s, '\n') do line = expandtabs(line) trimmed = ('%s%s\n'):format(trimmed, line:sub(indent_size + 1)) end return trimmed:sub(1, -2) end -- Gets raw buffer text in the node's range (+/- an offset), as a newline-delimited string. local function getbuflinestr(node, bufnr, offset) local line1, _, line2, _ = node:range() line1 = line1 - offset line2 = line2 + offset local lines = vim.fn.getbufline(bufnr, line1 + 1, line2 + 1) return table.concat(lines, '\n') end -- Gets the whitespace just before `node` from the raw buffer text. -- Needed for preformatted `old` lines. local function getws(node, bufnr) local line1, c1, line2, _ = node:range() local raw = vim.fn.getbufline(bufnr, line1 + 1, line2 + 1)[1] local text_before = raw:sub(1, c1) local leading_ws = text_before:match('%s+$') or '' return leading_ws end local function get_tagname(node, bufnr, link) local node_name = (node.named and node:named()) and node:type() or nil local node_text = vim.treesitter.get_node_text(node, bufnr) local tag = ((node_name == 'option' and node_text) or (link and node_text:gsub('^|', ''):gsub('|$', '') or node_text:gsub('^%*', ''):gsub('%*$', ''))) local helpfile = tag and vim.fs.basename(tagmap[tag]) or nil -- "api.txt" local helppage = get_helppage(helpfile) -- "api.html" return helppage, tag end -- Traverses the tree at `root` and checks that |tag| links point to valid helptags. local function visit_validate(root, level, lang_tree, opt, stats) level = level or 0 local node_name = (root.named and root:named()) and root:type() or nil local toplevel = level < 1 if root:child_count() > 0 then for node, _ in root:iter_children() do if node:named() then visit_validate(node, level + 1, lang_tree, opt, stats) end end end if node_name == 'ERROR' then -- Store the raw text to give context to the bug report. local sample_text = not toplevel and getbuflinestr(root, opt.buf, 3) or '[top level!]' table.insert(stats.parse_errors, sample_text) elseif node_name == 'hotlink' or node_name == 'option' then local _, tagname = get_tagname(root, opt.buf, true) if not root:has_error() and not tagmap[tagname] then invalid_tags[tagname] = vim.fs.basename(opt.fname) end end end -- Generates HTML from node `root` recursively. local function visit_node(root, level, lang_tree, headings, opt, stats) level = level or 0 local node_name = (root.named and root:named()) and root:type() or nil -- Previous sibling kind (string). local prev = root:prev_sibling() and (root:prev_sibling().named and root:prev_sibling():named()) and root:prev_sibling():type() or nil -- Next sibling kind (string). local next_ = root:next_sibling() and (root:next_sibling().named and root:next_sibling():named()) and root:next_sibling():type() or nil -- Parent kind (string). local parent = root:parent() and root:parent():type() or nil local text = '' local toplevel = level < 1 local function node_text() return vim.treesitter.get_node_text(root, opt.buf) end if root:child_count() == 0 then text = node_text() else -- Process children and join them with whitespace. for node, _ in root:iter_children() do if node:named() then local r = visit_node(node, level + 1, lang_tree, headings, opt, stats) local ws = r == '' and '' or ((opt.old and (node:type() == 'word' or not node:named())) and getws(node, opt.buf) or ' ') text = string.format('%s%s%s', text, ws, r) end end end local trimmed = trim(text) if node_name == 'help_file' then -- root node return text elseif node_name == 'word' or node_name == 'uppercase_name' then if parent == 'headline' then -- Start a new heading item, or update the current one. local n = (prev == nil or #headings == 0) and #headings + 1 or #headings headings[n] = string.format('%s%s', headings[n] and headings[n]..' ' or '', text) end return html_esc(text) elseif node_name == 'headline' then return ('

%s

\n'):format(to_heading_tag(headings[#headings]), text) elseif node_name == 'column_heading' or node_name == 'column_name' then return ('

%s

\n'):format(trimmed) elseif node_name == 'line' then -- TODO: remove these "sibling inspection" hacks once the parser provides structured info -- about paragraphs and listitems: https://github.com/vigoux/tree-sitter-vimdoc/issues/12 local next_text = root:next_sibling() and vim.treesitter.get_node_text(root:next_sibling(), opt.buf) or '' local li = startswith_bullet(text) -- Listitem? local next_li = startswith_bullet(next_text) -- Next is listitem? -- Close the paragraph/listitem if the next sibling is not a line. local close = (next_ ~= 'line' or next_li or is_blank(next_text)) and '\n' or '' -- HACK: discard common "noise" lines. if is_noise(text) then table.insert(stats.noise_lines, getbuflinestr(root, opt.buf, 0)) return (opt.old or prev ~= 'line') and '' or close end if opt.old then -- XXX: Treat old docs as preformatted. Until those docs are "fixed" or we get better info -- from tree-sitter-vimdoc, this avoids broken layout for legacy docs. return ('
%s
\n'):format(text) end if li then return string.format('
%s%s', trim_bullet(expandtabs(text)), close) end if prev ~= 'line' then -- Start a new paragraph. return string.format('
%s%s', expandtabs(text), close) end -- Continue in the current paragraph/listitem. return string.format('%s%s', expandtabs(text), close) elseif node_name == 'hotlink' or node_name == 'option' then local helppage, tagname = get_tagname(root, opt.buf, true) if not root:has_error() and not tagmap[tagname] then invalid_tags[tagname] = vim.fs.basename(opt.fname) end return ('%s'):format(helppage, url_encode(tagname), html_esc(tagname)) elseif node_name == 'backtick' then return ('%s'):format(html_esc(text)) elseif node_name == 'argument' then return ('{%s}'):format(html_esc(trimmed)) elseif node_name == 'code_block' then return ('
\n%s
\n'):format(html_esc(trim_indent(trim_gt_lt(text)))) elseif node_name == 'tag' then -- anchor local _, tagname = get_tagname(root, opt.buf, false) local s = ('%s'):format(url_encode(tagname), trimmed) if parent == 'headline' and prev ~= 'tag' then -- Start the container for tags in a heading. -- This makes "justify-content:space-between" right-align the tags. --

foo bartag1 tag2

return string.format('%s', s) elseif parent == 'headline' and next_ == nil then -- End the container for tags in a heading. return string.format('%s', s) end return s elseif node_name == 'ERROR' then -- Store the raw text to give context to the bug report. local sample_text = not toplevel and getbuflinestr(root, opt.buf, 3) or '[top level!]' table.insert(stats.parse_errors, sample_text) if prev == 'ERROR' then -- Avoid trashing the text with cascading errors. return trimmed, ('parse-error:"%s"'):format(node_text()) end return ('%s'):format( get_bug_url_vimdoc(opt.fname, opt.to_fname, sample_text), trimmed) else -- Unknown token. local sample_text = not toplevel and getbuflinestr(root, opt.buf, 3) or '[top level!]' return ('%s'):format( node_name, get_bug_url_nvim(opt.fname, opt.to_fname, sample_text, node_name), trimmed), ('unknown-token:"%s"'):format(node_name) end end local function get_helpfiles(include) local dir = './build/runtime/doc' local rv = {} for f, type in vim.fs.dir(dir) do if (vim.endswith(f, '.txt') and type == 'file' and (not include or vim.tbl_contains(include, f)) and (not exclude[f])) then local fullpath = vim.fn.fnamemodify(('%s/%s'):format(dir, f), ':p') table.insert(rv, fullpath) end end return rv end -- Populates the helptags map. local function get_helptags(help_dir) local m = {} -- Load a random help file to convince taglist() to do its job. vim.cmd(string.format('split %s/api.txt', help_dir)) vim.cmd('lcd %:p:h') for _, item in ipairs(vim.fn.taglist('.*')) do if vim.endswith(item.filename, '.txt') then m[item.name] = item.filename end end vim.cmd('q!') return m end -- Opens `fname` in a buffer and gets a treesitter parser for the buffer contents. -- -- @returns lang_tree, bufnr local function parse_buf(fname) local buf if type(fname) == 'string' then vim.cmd('split '..vim.fn.fnameescape(fname)) -- Filename. buf = api.nvim_get_current_buf() else buf = fname vim.cmd('sbuffer '..tostring(fname)) -- Buffer number. end -- vim.treesitter.require_language('help', './build/lib/nvim/parser/help.so') local lang_tree = vim.treesitter.get_parser(buf, 'help') return lang_tree, buf end -- Validates one :help file `fname`: -- - checks that |tag| links point to valid helptags. -- - recursively counts parse errors ("ERROR" nodes) -- -- @returns { invalid_tags: number, parse_errors: number } local function validate_one(fname) local stats = { invalid_tags = {}, parse_errors = {}, } local lang_tree, buf = parse_buf(fname) for _, tree in ipairs(lang_tree:trees()) do visit_validate(tree:root(), 0, tree, { buf = buf, fname = fname, }, stats) end lang_tree:destroy() vim.cmd.close() return { invalid_tags = invalid_tags, parse_errors = stats.parse_errors, } end -- Generates HTML from one :help file `fname` and writes the result to `to_fname`. -- -- @param fname Source :help file -- @param to_fname Destination .html file -- @param old boolean Preformat paragraphs (for old :help files which are full of arbitrary whitespace) -- -- @returns html, stats local function gen_one(fname, to_fname, old) local stats = { noise_lines = {}, parse_errors = {}, } local lang_tree, buf = parse_buf(fname) local headings = {} -- Headings (for ToC). local title = to_titlecase(basename_noext(fname)) local html = ([[ %s - Neovim docs ]]):format(title) local logo_svg = [[ Neovim ]] local main = ([[

%s

Nvim help pages, updated automatically from source. Parsing by tree-sitter-vimdoc.

]]):format(logo_svg, title, vim.fs.basename(fname)) for _, tree in ipairs(lang_tree:trees()) do main = main .. (visit_node(tree:root(), 0, tree, headings, { buf = buf, old = old, fname = fname, to_fname = to_fname }, stats)) end main = main .. '
\n' local toc = [[

]] for _, heading in ipairs(headings) do toc = toc .. ('\n'):format(to_heading_tag(heading), heading) end toc = toc .. '
\n' local bug_url = get_bug_url_nvim(fname, to_fname, 'TODO', nil) local bug_link = string.format('(report docs bug...)', bug_url) local footer = ([[ ]]):format( os.date('%Y-%m-%d %H:%M:%S'), commit, #stats.parse_errors, bug_link, html_esc(table.concat(stats.noise_lines, '\n')), #stats.noise_lines) html = ('%s%s%s
\n%s\n\n'):format( html, main, toc, footer) vim.cmd('q!') lang_tree:destroy() return html, stats end local function gen_css(fname) local css = [[ @media (min-width: 40em) { .toc { position: fixed; left: 67%; } } .toc { /* max-width: 12rem; */ } .toc > div { text-overflow: ellipsis; overflow: hidden; white-space: nowrap; } html { scroll-behavior: auto; } h1, h2, h3, h4 { font-family: sans-serif; } .help-body { padding-bottom: 2em; } .help-line { /* font-family: ui-monospace,SFMono-Regular,SF Mono,Menlo,Consolas,Liberation Mono,monospace; */ } .help-item { display: list-item; margin-left: 1.5rem; /* padding-left: 1rem; */ } .help-para { padding-top: 10px; padding-bottom: 10px; } .old-help-line { /* Tabs are used for alignment in old docs, so we must match Vim's 8-char expectation. */ tab-size: 8; white-space: pre; font-size: .875em; font-family: ui-monospace,SFMono-Regular,SF Mono,Menlo,Consolas,Liberation Mono,monospace; } a.help-tag, a.help-tag:focus, a.help-tag:hover { color: inherit; text-decoration: none; } .help-tag { color: gray; } h1 .help-tag, h2 .help-tag { font-size: smaller; } .help-heading { overflow: hidden; white-space: nowrap; display: flex; justify-content: space-between; } /* The (right-aligned) "tags" part of a section heading. */ .help-heading-tags { margin-left: 10px; } .parse-error { background-color: red; } .unknown-token { color: black; background-color: yellow; } pre { /* Tabs are used in code_blocks only for indentation, not alignment, so we can aggressively shrink them. */ tab-size: 2; white-space: pre; overflow: visible; /* font-family: ui-monospace,SFMono-Regular,SF Mono,Menlo,Consolas,Liberation Mono,monospace; */ /* font-size: 14px; */ /* border: 0px; */ /* margin: 0px; */ } pre:hover, .help-heading:hover { overflow: visible; } .generator-stats { color: gray; font-size: smaller; } .golden-grid { display: grid; grid-template-columns: 65% auto; grid-gap: 1em; } ]] tofile(fname, css) end function M._test() tagmap = get_helptags('./build/runtime/doc') helpfiles = get_helpfiles() local function ok(cond, expected, actual) assert((not expected and not actual) or (expected and actual), 'if "expected" is given, "actual" is also required') if expected then return assert(cond, ('expected %s, got: %s'):format(vim.inspect(expected), vim.inspect(actual))) else return assert(cond) end end local function eq(expected, actual) return ok(expected == actual, expected, actual) end eq(119, #helpfiles) ok(vim.tbl_count(tagmap) > 3000, '>3000', vim.tbl_count(tagmap)) ok(vim.endswith(tagmap['vim.diagnostic.set()'], 'diagnostic.txt'), tagmap['vim.diagnostic.set()'], 'diagnostic.txt') ok(vim.endswith(tagmap['%:s'], 'cmdline.txt'), tagmap['%:s'], 'cmdline.txt') ok(is_noise([[vim:tw=78:isk=!-~,^*,^\|,^\":ts=8:noet:ft=help:norl:]])) ok(is_noise([[ VIM REFERENCE MANUAL by Abe Lincoln ]])) ok(not is_noise([[vim:tw=78]])) eq(0, get_indent('a')) eq(1, get_indent(' a')) eq(2, get_indent(' a\n b\n c\n')) eq(5, get_indent(' a\n \n b\n c\n d\n e\n')) eq('a\n \n b\n c\n d\n e\n', trim_indent(' a\n \n b\n c\n d\n e\n')) print('all tests passed') end --- Generates HTML from :help docs located in `help_dir` and writes the result in `to_dir`. --- --- Example: --- --- gen('./build/runtime/doc', '/path/to/neovim.github.io/_site/doc/', {'api.txt', 'autocmd.txt', 'channel.txt'}, nil) --- --- @param help_dir string Source directory containing the :help files. Must run `make helptags` first. --- @param to_dir string Target directory where the .html files will be written. --- @param include table|nil Process only these filenames. Example: {'api.txt', 'autocmd.txt', 'channel.txt'} --- --- @returns info dict function M.gen(help_dir, to_dir, include) vim.validate{ help_dir={help_dir, function(d) return vim.fn.isdirectory(d) == 1 end, 'valid directory'}, to_dir={to_dir, 's'}, include={include, 't', true}, } local err_count = 0 tagmap = get_helptags(help_dir) helpfiles = get_helpfiles(include) print(('output dir: %s'):format(to_dir)) vim.fn.mkdir(to_dir, 'p') gen_css(('%s/help.css'):format(to_dir)) for _, f in ipairs(helpfiles) do local helpfile = vim.fs.basename(f) local to_fname = ('%s/%s'):format(to_dir, get_helppage(helpfile)) local html, stats = gen_one(f, to_fname, not new_layout[helpfile]) tofile(to_fname, html) print(('generated (%-4s errors): %-15s => %s'):format(#stats.parse_errors, helpfile, vim.fs.basename(to_fname))) err_count = err_count + #stats.parse_errors end print(('generated %d html pages'):format(#helpfiles)) print(('total errors: %d'):format(err_count)) print(('invalid tags:\n%s'):format(vim.inspect(invalid_tags))) return { helpfiles = helpfiles, err_count = err_count, invalid_tags = invalid_tags, } end -- Validates all :help files found in `help_dir`: -- - checks that |tag| links point to valid helptags. -- - recursively counts parse errors ("ERROR" nodes) -- -- This is 10x faster than gen(), for use in CI. -- -- @returns results dict function M.validate(help_dir, include) vim.validate{ help_dir={help_dir, function(d) return vim.fn.isdirectory(d) == 1 end, 'valid directory'}, include={include, 't', true}, } local err_count = 0 tagmap = get_helptags(help_dir) helpfiles = get_helpfiles(include) for _, f in ipairs(helpfiles) do local helpfile = vim.fs.basename(f) local rv = validate_one(f) print(('validated (%-4s errors): %s'):format(#rv.parse_errors, helpfile)) err_count = err_count + #rv.parse_errors end return { helpfiles = helpfiles, err_count = err_count, invalid_tags = invalid_tags, } end return M