From 09b64d75bd92a95d89c4f39f9df7918760abe98d Mon Sep 17 00:00:00 2001 From: "Justin M. Keyes" Date: Sun, 27 Mar 2022 19:47:34 -0700 Subject: feat(docs): gen_help_html.lua MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: The :help docs HTML generated is driven by an old awk script `runtime/doc/makehtml.awk` that is hard to maintain (ad hoc parser and no one has touched it in decades) and has bugs like: - https://github.com/neovim/neovim.github.io/issues/96 - https://github.com/neovim/neovim.github.io/issues/97 Solution: Use Lua + treesitter (https://github.com/vigoux/tree-sitter-vimdoc) to generate :help docs HTML. Also validates tag links. fix https://github.com/neovim/neovim.github.io/issues/96 fix https://github.com/neovim/neovim.github.io/issues/97 TODO: - delete doc_html build task - delete runtime/doc/Makefile - delete makehtml.awk - delete maketags.awk OUTPUT: $ nvim -V1 -es --clean +"lua require('scripts.gen_help_html')" output dir: /…/neovim.github.io/_site/doc/ generated (207 errors): api.txt => api.html generated (122 errors): arabic.txt => arabic.html generated (285 errors): autocmd.txt => autocmd.html generated (641 errors): builtin.txt => builtin.html generated (623 errors): change.txt => change.html generated (65 errors): channel.txt => channel.html generated (353 errors): cmdline.txt => cmdline.html generated (3 errors): debug.txt => debug.html generated (28 errors): deprecated.txt => deprecated.html generated (193 errors): dev_style.txt => dev_style.html generated (460 errors): develop.txt => develop.html generated (19 errors): diagnostic.txt => diagnostic.html generated (57 errors): diff.txt => diff.html generated (818 errors): digraph.txt => digraph.html generated (330 errors): editing.txt => editing.html generated (368 errors): eval.txt => eval.html generated (184 errors): fold.txt => fold.html generated (61 errors): ft_ada.txt => ft_ada.html generated (0 errors): ft_ps1.txt => ft_ps1.html generated (20 errors): ft_raku.txt => ft_raku.html generated (5 errors): ft_rust.txt => ft_rust.html generated (41 errors): ft_sql.txt => ft_sql.html generated (110 errors): gui.txt => gui.html generated (79 errors): hebrew.txt => hebrew.html generated (17 errors): help.txt => index.html generated (104 errors): helphelp.txt => helphelp.html generated (0 errors): if_cscop.txt => if_cscop.html generated (23 errors): if_perl.txt => if_perl.html generated (16 errors): if_pyth.txt => if_pyth.html generated (9 errors): if_ruby.txt => if_ruby.html generated (216 errors): indent.txt => indent.html generated (634 errors): index.txt => vimindex.html generated (320 errors): insert.txt => insert.html generated (265 errors): intro.txt => intro.html generated (9 errors): job_control.txt => job_control.html generated (0 errors): lsp-extension.txt => lsp-extension.html generated (214 errors): lsp.txt => lsp.html generated (311 errors): lua.txt => lua.html generated (592 errors): luaref.txt => luaref.html generated (798 errors): luvref.txt => luvref.html generated (663 errors): map.txt => map.html generated (228 errors): mbyte.txt => mbyte.html generated (228 errors): message.txt => message.html generated (0 errors): mlang.txt => mlang.html generated (761 errors): motion.txt => motion.html generated (4 errors): nvim.txt => nvim.html generated (226 errors): nvim_terminal_emulator.txt => nvim_terminal_emulator.html generated (988 errors): options.txt => options.html generated (567 errors): pattern.txt => pattern.html generated (15 errors): pi_gzip.txt => pi_gzip.html generated (10 errors): pi_health.txt => pi_health.html generated (27 errors): pi_msgpack.txt => pi_msgpack.html generated (2177 errors): pi_netrw.txt => pi_netrw.html generated (41 errors): pi_paren.txt => pi_paren.html generated (9 errors): pi_spec.txt => pi_spec.html generated (218 errors): pi_tar.txt => pi_tar.html generated (0 errors): pi_tutor.txt => pi_tutor.html generated (235 errors): pi_zip.txt => pi_zip.html generated (265 errors): print.txt => print.html generated (31 errors): provider.txt => provider.html generated (335 errors): quickfix.txt => quickfix.html generated (572 errors): quickref.txt => quickref.html generated (109 errors): recover.txt => recover.html generated (14 errors): remote.txt => remote.html generated (14 errors): remote_plugin.txt => remote_plugin.html generated (351 errors): repeat.txt => repeat.html generated (23 errors): rileft.txt => rileft.html generated (12 errors): russian.txt => russian.html generated (6 errors): scroll.txt => scroll.html generated (106 errors): sign.txt => sign.html generated (347 errors): spell.txt => spell.html generated (784 errors): starting.txt => starting.html generated (1499 errors): syntax.txt => syntax.html generated (23 errors): tabpage.txt => tabpage.html generated (257 errors): tagsrch.txt => tagsrch.html generated (31 errors): term.txt => term.html generated (0 errors): testing.txt => testing.html generated (96 errors): tips.txt => tips.html generated (57 errors): treesitter.txt => treesitter.html generated (71 errors): uganda.txt => uganda.html generated (74 errors): ui.txt => ui.html generated (87 errors): undo.txt => undo.html generated (17 errors): userfunc.txt => userfunc.html generated (1 errors): usr_01.txt => usr_01.html generated (89 errors): usr_02.txt => usr_02.html generated (293 errors): usr_03.txt => usr_03.html generated (46 errors): usr_04.txt => usr_04.html generated (96 errors): usr_05.txt => usr_05.html generated (54 errors): usr_06.txt => usr_06.html generated (20 errors): usr_07.txt => usr_07.html generated (241 errors): usr_08.txt => usr_08.html generated (130 errors): usr_09.txt => usr_09.html generated (50 errors): usr_10.txt => usr_10.html generated (33 errors): usr_11.txt => usr_11.html generated (32 errors): usr_12.txt => usr_12.html generated (22 errors): usr_20.txt => usr_20.html generated (75 errors): usr_21.txt => usr_21.html generated (8 errors): usr_22.txt => usr_22.html generated (3 errors): usr_23.txt => usr_23.html generated (163 errors): usr_25.txt => usr_25.html generated (13 errors): usr_26.txt => usr_26.html generated (84 errors): usr_27.txt => usr_27.html generated (173 errors): usr_28.txt => usr_28.html generated (285 errors): usr_29.txt => usr_29.html generated (280 errors): usr_30.txt => usr_30.html generated (11 errors): usr_31.txt => usr_31.html generated (13 errors): usr_32.txt => usr_32.html generated (156 errors): usr_40.txt => usr_40.html generated (134 errors): usr_41.txt => usr_41.html generated (35 errors): usr_42.txt => usr_42.html generated (19 errors): usr_43.txt => usr_43.html generated (60 errors): usr_44.txt => usr_44.html generated (13 errors): usr_45.txt => usr_45.html generated (1 errors): usr_toc.txt => usr_toc.html generated (69 errors): various.txt => various.html generated (68 errors): vi_diff.txt => vi_diff.html generated (437 errors): vim_diff.txt => vim_diff.html generated (296 errors): visual.txt => visual.html generated (181 errors): windows.txt => windows.html generated 119 html pages total errors: 23862 invalid tags: 537 --- scripts/gen_help_html.py | 389 ----------------------------------------------- 1 file changed, 389 deletions(-) delete mode 100644 scripts/gen_help_html.py (limited to 'scripts/gen_help_html.py') diff --git a/scripts/gen_help_html.py b/scripts/gen_help_html.py deleted file mode 100644 index 0b8e77ac22..0000000000 --- a/scripts/gen_help_html.py +++ /dev/null @@ -1,389 +0,0 @@ -# Converts Vim/Nvim documentation to HTML. -# -# USAGE: -# 1. python3 scripts/gen_help_html.py runtime/doc/ ~/neovim.github.io/t/ -# 3. cd ~/neovim.github.io/ && jekyll serve --host 0.0.0.0 -# 2. Visit http://localhost:4000/t/help.txt.html -# -# Adapted from https://github.com/c4rlo/vimhelp/ -# License: MIT -# -# Copyright (c) 2016 Carlo Teubner -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import os -import re -import urllib.parse -import datetime -import sys -from itertools import chain - -HEAD = """\ - - - - - -Nvim: {filename} -""" - -HEAD_END = '\n\n' - -INTRO = """ -

Nvim help files

-

-Nvim help pages{vers-note}. -Updated automatically -from the Nvim source. -

-""" - -VERSION_NOTE = ", current as of Nvim {version}" - -SITENAVI_LINKS = """ -Quick reference · -User manual · -Reference manual · -""" - -SITENAVI_LINKS_PLAIN = SITENAVI_LINKS.format(helptxt='help.txt.html') -SITENAVI_LINKS_WEB = SITENAVI_LINKS.format(helptxt='/') - -SITENAVI_PLAIN = '

' + SITENAVI_LINKS_PLAIN + '

' -SITENAVI_WEB = '

' + SITENAVI_LINKS_WEB + '

' - -SITENAVI_SEARCH = '
' + SITENAVI_LINKS_WEB + \ - '
' - -TEXTSTART = """ -
-
""" + (" " * 80) + """
-
-
-"""
-
-FOOTER = '
' - -FOOTER2 = """ - -
-
- - -""".format( - generated_date='{0:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()), - commit='?') - -RE_TAGLINE = re.compile(r'(\S+)\s+(\S+)') - -PAT_WORDCHAR = '[!#-)+-{}~\xC0-\xFF]' - -PAT_HEADER = r'(^.*~$)' -PAT_GRAPHIC = r'(^.* `$)' -PAT_PIPEWORD = r'(?|.)?)' -PAT_SPECIAL = r'(<.+?>|\{.+?}|' \ - r'\[(?:range|line|count|offset|\+?cmd|[-+]?num|\+\+opt|' \ - r'arg|arguments|ident|addr|group)]|' \ - r'(?<=\s)\[[-a-z^A-Z0-9_]{2,}])' -PAT_TITLE = r'(Vim version [0-9.a-z]+|VIM REFERENCE.*)' -PAT_NOTE = r'((? \t]+[a-zA-Z0-9/])' -PAT_WORD = r'((?$') -RE_EG_END = re.compile(r'\S') -RE_SECTION = re.compile(r'[-A-Z .][-A-Z0-9 .()]*(?=\s+\*)') -RE_STARTAG = re.compile(r'\s\*([^ \t|]+)\*(?:\s|$)') -RE_LOCAL_ADD = re.compile(r'LOCAL ADDITIONS:\s+\*local-additions\*$') - - -class Link(object): - __slots__ = 'link_plain_same', 'link_pipe_same', \ - 'link_plain_foreign', 'link_pipe_foreign', \ - 'filename' - - def __init__(self, link_plain_same, link_plain_foreign, - link_pipe_same, link_pipe_foreign, filename): - self.link_plain_same = link_plain_same - self.link_plain_foreign = link_plain_foreign - self.link_pipe_same = link_pipe_same - self.link_pipe_foreign = link_pipe_foreign - self.filename = filename - - -class VimH2H(object): - def __init__(self, tags, version=None, is_web_version=True): - self._urls = {} - self._version = version - self._is_web_version = is_web_version - for line in RE_NEWLINE.split(tags): - m = RE_TAGLINE.match(line) - if m: - tag, filename = m.group(1, 2) - self.do_add_tag(filename, tag) - - def add_tags(self, filename, contents): - for match in RE_STARTAG.finditer(contents): - tag = match.group(1).replace('\\', '\\\\').replace('/', '\\/') - self.do_add_tag(str(filename), tag) - - def do_add_tag(self, filename, tag): - tag_quoted = urllib.parse.quote_plus(tag) - - def mkpart1(doc): - return '' + html_escape[tag] + '' - - def mklinks(cssclass): - return (part1_same + cssclass + part2, - part1_foreign + cssclass + part2) - cssclass_plain = 'd' - m = RE_LINKWORD.match(tag) - if m: - opt, ctrl, special = m.groups() - if opt is not None: - cssclass_plain = 'o' - elif ctrl is not None: - cssclass_plain = 'k' - elif special is not None: - cssclass_plain = 's' - links_plain = mklinks(cssclass_plain) - links_pipe = mklinks('l') - self._urls[tag] = Link( - links_plain[0], links_plain[1], - links_pipe[0], links_pipe[1], - filename) - - def maplink(self, tag, curr_filename, css_class=None): - links = self._urls.get(tag) - if links is not None: - if links.filename == curr_filename: - if css_class == 'l': - return links.link_pipe_same - else: - return links.link_plain_same - else: - if css_class == 'l': - return links.link_pipe_foreign - else: - return links.link_plain_foreign - elif css_class is not None: - return '' + html_escape[tag] + \ - '' - else: - return html_escape[tag] - - def to_html(self, filename, contents, encoding): - out = [] - - inexample = 0 - filename = str(filename) - is_help_txt = (filename == 'help.txt') - last = '' - for line in RE_NEWLINE.split(contents): - line = line.rstrip('\r\n') - line_tabs = line - line = line.expandtabs() - if last == 'h1': - out.extend(('')) # XXX - out.extend(('

', line.rstrip(), '

\n')) - out.extend(('
'))
-                last = ''
-                continue
-            if RE_HRULE.match(line):
-                # out.extend(('', line, '\n'))
-                last = 'h1'
-                continue
-            if inexample == 2:
-                if RE_EG_END.match(line):
-                    inexample = 0
-                    if line[0] == '<':
-                        line = line[1:]
-                else:
-                    out.extend(('', html_escape[line],
-                                '\n'))
-                    continue
-            if RE_EG_START.match(line_tabs):
-                inexample = 1
-                line = line[0:-1]
-            if RE_SECTION.match(line_tabs):
-                m = RE_SECTION.match(line)
-                out.extend((r'', m.group(0), r''))
-                line = line[m.end():]
-            lastpos = 0
-            for match in RE_TAGWORD.finditer(line):
-                pos = match.start()
-                if pos > lastpos:
-                    out.append(html_escape[line[lastpos:pos]])
-                lastpos = match.end()
-                header, graphic, pipeword, starword, command, opt, ctrl, \
-                    special, title, note, url, word = match.groups()
-                if pipeword is not None:
-                    out.append(self.maplink(pipeword, filename, 'l'))
-                elif starword is not None:
-                    out.extend(('', html_escape[starword], ''))
-                elif command is not None:
-                    out.extend(('', html_escape[command],
-                                ''))
-                elif opt is not None:
-                    out.append(self.maplink(opt, filename, 'o'))
-                elif ctrl is not None:
-                    out.append(self.maplink(ctrl, filename, 'k'))
-                elif special is not None:
-                    out.append(self.maplink(special, filename, 's'))
-                elif title is not None:
-                    out.extend(('', html_escape[title],
-                                ''))
-                elif note is not None:
-                    out.extend(('', html_escape[note],
-                                ''))
-                elif header is not None:
-                    out.extend(('', html_escape[header[:-1]],
-                                ''))
-                elif graphic is not None:
-                    out.append(html_escape[graphic[:-2]])
-                elif url is not None:
-                    out.extend(('' +
-                                html_escape[url], ''))
-                elif word is not None:
-                    out.append(self.maplink(word, filename))
-            if lastpos < len(line):
-                out.append(html_escape[line[lastpos:]])
-            out.append('\n')
-            if inexample == 1:
-                inexample = 2
-
-        header = []
-        header.append(HEAD.format(encoding=encoding, filename=filename))
-        header.append(HEAD_END)
-        if self._is_web_version and is_help_txt:
-            vers_note = VERSION_NOTE.replace('{version}', self._version) \
-                if self._version else ''
-            header.append(INTRO.replace('{vers-note}', vers_note))
-        if self._is_web_version:
-            header.append(SITENAVI_SEARCH)
-            sitenavi_footer = SITENAVI_WEB
-        else:
-            header.append(SITENAVI_PLAIN)
-            sitenavi_footer = SITENAVI_PLAIN
-        header.append(TEXTSTART)
-        return ''.join(chain(header, out, (FOOTER, sitenavi_footer, FOOTER2)))
-
-
-class HtmlEscCache(dict):
-    def __missing__(self, key):
-        r = key.replace('&', '&') \
-               .replace('<', '<') \
-               .replace('>', '>')
-        self[key] = r
-        return r
-
-
-html_escape = HtmlEscCache()
-
-
-def slurp(filename):
-    try:
-        with open(filename, encoding='UTF-8') as f:
-            return f.read(), 'UTF-8'
-    except UnicodeError:
-        # 'ISO-8859-1' ?
-        with open(filename, encoding='latin-1') as f:
-            return f.read(), 'latin-1'
-
-
-def usage():
-    return "usage: " + sys.argv[0] + " IN_DIR OUT_DIR [BASENAMES...]"
-
-
-def main():
-    if len(sys.argv) < 3:
-        sys.exit(usage())
-
-    in_dir = sys.argv[1]
-    out_dir = sys.argv[2]
-    basenames = sys.argv[3:]
-
-    print("Processing tags...")
-    h2h = VimH2H(slurp(os.path.join(in_dir, 'tags'))[0], is_web_version=False)
-
-    if len(basenames) == 0:
-        basenames = os.listdir(in_dir)
-
-    for basename in basenames:
-        if os.path.splitext(basename)[1] != '.txt' and basename != 'tags':
-            print("Ignoring " + basename)
-            continue
-        print("Processing " + basename + "...")
-        path = os.path.join(in_dir, basename)
-        text, encoding = slurp(path)
-        outpath = os.path.join(out_dir, basename + '.html')
-        of = open(outpath, 'w')
-        of.write(h2h.to_html(basename, text, encoding))
-        of.close()
-
-
-main()
-- 
cgit