gen_vimdoc.py: better handling of inline (non-block) nodes

author: Justin M. Keyes <justinkz@gmail.com> 2019-12-24 00:04:14 -0800
committer: Justin M. Keyes <justinkz@gmail.com> 2019-12-30 03:12:24 -0800
commit: b81547ce6d182e2a7518e5a2f80d1ee90c5cdb8f (patch)
tree: 12541b3c0931533db8f00ad8e98523c2bd329dc4 /scripts/gen_vimdoc.py
parent: 27b678f577eb422ccc67e803727c3b26756787f3 (diff)
download: rneovim-b81547ce6d182e2a7518e5a2f80d1ee90c5cdb8f.tar.gz
rneovim-b81547ce6d182e2a7518e5a2f80d1ee90c5cdb8f.tar.bz2
rneovim-b81547ce6d182e2a7518e5a2f80d1ee90c5cdb8f.zip
1 files changed, 141 insertions, 117 deletions
diff --git a/scripts/gen_vimdoc.py b/scripts/gen_vimdoc.py
index 93851ed17e..891336f571 100755
--- a/scripts/gen_vimdoc.py
+++ b/scripts/gen_vimdoc.py
@@ -1,18 +1,18 @@
 #!/usr/bin/env python3
-"""Generates Nvim help docs from C/Lua docstrings, using Doxygen.
+"""Generates Nvim :help docs from C/Lua docstrings, using Doxygen.
 
 Also generates *.mpack files. To inspect the *.mpack structure:
 
-    :new | put=json_encode(msgpackparse(readfile('runtime/doc/api.mpack')))
+    :new | put=v:lua.vim.inspect(msgpackparse(readfile('runtime/doc/api.mpack')))
+
 
 Flow:
-    gen_docs
+    main
       extract_from_xml
-        fmt_node_as_vimhelp
-          fmt_params_map_as_vimhelp
-            render_node
-          para_as_map
-            render_node
+        fmt_node_as_vimhelp   \
+          para_as_map          } recursive
+            update_params_map /
+              render_node
 
 This would be easier using lxml and XSLT, but:
 
@@ -58,6 +58,7 @@ DEBUG = ('DEBUG' in os.environ)
 INCLUDE_C_DECL = ('INCLUDE_C_DECL' in os.environ)
 INCLUDE_DEPRECATED = ('INCLUDE_DEPRECATED' in os.environ)
 
+fmt_vimhelp = False  # HACK
 text_width = 78
 script_path = os.path.abspath(__file__)
 base_dir = os.path.dirname(os.path.dirname(script_path))
@@ -144,6 +145,7 @@ def debug_this(cond, o):
         except Exception:
             pass
     if ((callable(cond) and cond())
+            or (not callable(cond) and cond == True)
             or (not callable(cond) and cond in o)):
         raise RuntimeError('xxx: {}\n{}'.format(name, o))
 
@@ -156,20 +158,27 @@ def find_first(parent, name):
     return sub[0]
 
 
-def get_children(parent, name):
-    """Yield matching child nodes within parent."""
+def iter_children(parent, name):
+    """Yields matching child nodes within parent."""
     for child in parent.childNodes:
         if child.nodeType == child.ELEMENT_NODE and child.nodeName == name:
             yield child
 
 
 def get_child(parent, name):
-    """Get the first matching child node."""
-    for child in get_children(parent, name):
+    """Gets the first matching child node."""
+    for child in iter_children(parent, name):
         return child
     return None
 
 
+def self_or_child(n):
+    """Gets the first child node, or self."""
+    if len(n.childNodes) == 0:
+        return n
+    return n.childNodes[0]
+
+
 def clean_text(text):
     """Cleans text.
 
@@ -190,18 +199,21 @@ def is_blank(text):
     return '' == clean_lines(text)
 
 
-def get_text(parent, preformatted=False):
-    """Combine all text in a node."""
-    if parent.nodeType == parent.TEXT_NODE:
-        return parent.data
-
-    out = ''
-    for node in parent.childNodes:
+def get_text(n, preformatted=False):
+    """Recursively concatenates all text in a node tree."""
+    text = ''
+    if n.nodeType == n.TEXT_NODE:
+        return n.data
+    if n.nodeName == 'computeroutput':
+        for node in n.childNodes:
+            text += get_text(node)
+        return '`{}` '.format(text)
+    for node in n.childNodes:
         if node.nodeType == node.TEXT_NODE:
-            out += node.data if preformatted else clean_text(node.data)
+            text += node.data if preformatted else clean_text(node.data)
         elif node.nodeType == node.ELEMENT_NODE:
-            out += ' ' + get_text(node, preformatted)
-    return out
+            text += ' ' + get_text(node, preformatted)
+    return text
 
 
 # Gets the length of the last line in `text`, excluding newline ("\n") char.
@@ -221,6 +233,8 @@ def len_lastline_withoutindent(text, indent):
 
 # Returns True if node `n` contains only inline (not block-level) elements.
 def is_inline(n):
+    # if len(n.childNodes) == 0:
+    #     return n.nodeType == n.TEXT_NODE or n.nodeName == 'computeroutput'
     for c in n.childNodes:
         if c.nodeType != c.TEXT_NODE and c.nodeName != 'computeroutput':
             return False
@@ -271,11 +285,17 @@ def doc_wrap(text, prefix='', width=70, func=False, indent=None):
     return result
 
 
+def max_name(names):
+    if len(names) == 0:
+        return 0
+    return max(len(name) for name in names)
+
+
 def update_params_map(parent, ret_map, width=62):
     """Updates `ret_map` with name:desc key-value pairs extracted
     from Doxygen XML node `parent`.
     """
-    params = []
+    params = collections.OrderedDict()
     for node in parent.childNodes:
         if node.nodeType == node.TEXT_NODE:
             continue
@@ -285,48 +305,34 @@ def update_params_map(parent, ret_map, width=62):
         name = get_text(name_node)
         if name in param_exclude:
             continue
-        params.append((name.strip(), node))
+        params[name.strip()] = node
+    max_name_len = max_name(params.keys()) + 8
     # `ret_map` is a name:desc map.
-    for name, node in params:
+    for name, node in params.items():
         desc = ''
         desc_node = get_child(node, 'parameterdescription')
         if desc_node:
-            desc = fmt_node_as_vimhelp(desc_node, width=width, indent=(" " * len(name)))
+            desc = fmt_node_as_vimhelp(desc_node, width=width,
+                                  indent=(' ' * max_name_len))
             ret_map[name] = desc
     return ret_map
 
 
-def fmt_params_map_as_vimhelp(m, width=62):
-    """Renders a params map as Vim :help text."""
-    max_name_len = 0
-    for name, desc in m.items():
-        max_name_len = max(max_name_len, len(name) + 4)
-    out = ''
-    for name, desc in m.items():
-        name = '    {}'.format('{{{}}}'.format(name).ljust(max_name_len))
-        out += '{}{}\n'.format(name, desc)
-    return out.rstrip()
-
-
 def render_node(n, text, prefix='', indent='', width=62):
     """Renders a node as Vim help text, recursively traversing all descendants."""
+    global fmt_vimhelp
+    def ind(s):
+        return s if fmt_vimhelp else ''
     text = ''
     # space_preceding = (len(text) > 0 and ' ' == text[-1][-1])
     # text += (int(not space_preceding) * ' ')
 
-    if n.nodeType == n.TEXT_NODE:
-        # `prefix` is NOT sent to doc_wrap, it was already handled by now.
-        text += doc_wrap(n.data, indent=indent, width=width)
-    elif n.nodeName == 'computeroutput':
-        text += ' `{}` '.format(get_text(n))
-    elif n.nodeName == 'preformatted':
+    if n.nodeName == 'preformatted':
         o = get_text(n, preformatted=True)
         ensure_nl = '' if o[-1] == '\n' else '\n'
-        text += ' >{}{}\n<'.format(ensure_nl, o)
+        text += '>{}{}\n<'.format(ensure_nl, o)
     elif is_inline(n):
-        for c in n.childNodes:
-            text += render_node(c, text)
-        text = doc_wrap(text, indent=indent, width=width)
+        text = doc_wrap(get_text(n), indent=indent, width=width)
     elif n.nodeName == 'verbatim':
         # TODO: currently we don't use this. The "[verbatim]" hint is there as
         # a reminder that we must decide how to format this if we do use it.
@@ -341,8 +347,6 @@ def render_node(n, text, prefix='', indent='', width=62):
     elif n.nodeName in ('para', 'heading'):
         for c in n.childNodes:
             text += render_node(c, text, indent=indent, width=width)
-        if is_inline(n):
-            text = doc_wrap(text, indent=indent, width=width)
     elif n.nodeName == 'itemizedlist':
         for c in n.childNodes:
             text += '{}\n'.format(render_node(c, text, prefix='• ',
@@ -368,7 +372,7 @@ def render_node(n, text, prefix='', indent='', width=62):
         text += '\n'
     elif (n.nodeName == 'simplesect'
             and n.getAttribute('kind') in ('return', 'see')):
-        text += '    '
+        text += ind('    ')
         for c in n.childNodes:
             text += render_node(c, text, indent='    ', width=width)
     else:
@@ -395,11 +399,6 @@ def para_as_map(parent, indent='', width=62):
         'xrefs': []
     }
 
-    if is_inline(parent):
-        chunks["text"] = clean_lines(
-            doc_wrap(render_node(parent, ""), indent=indent, width=width).strip()
-        )
-
     # Ordered dict of ordered lists.
     groups = collections.OrderedDict([
         ('params', []),
@@ -413,27 +412,39 @@ def para_as_map(parent, indent='', width=62):
     text = ''
     kind = ''
     last = ''
-    for child in parent.childNodes:
-        if child.nodeName == 'parameterlist':
-            groups['params'].append(child)
-        elif child.nodeName == 'xrefsect':
-            groups['xrefs'].append(child)
-        elif child.nodeName == 'simplesect':
-            last = kind
-            kind = child.getAttribute('kind')
-            if kind == 'return' or (kind == 'note' and last == 'return'):
-                groups['return'].append(child)
-            elif kind == 'see':
-                groups['seealso'].append(child)
-            elif kind in ('note', 'warning'):
-                text += render_node(child, text, indent=indent, width=width)
+    if is_inline(parent):
+        # Flatten inline text from a tree of non-block nodes.
+        text = doc_wrap(render_node(parent, ""), indent=indent, width=width)
+    else:
+        prev = None  # Previous node
+        for child in parent.childNodes:
+            if child.nodeName == 'parameterlist':
+                groups['params'].append(child)
+            elif child.nodeName == 'xrefsect':
+                groups['xrefs'].append(child)
+            elif child.nodeName == 'simplesect':
+                last = kind
+                kind = child.getAttribute('kind')
+                if kind == 'return' or (kind == 'note' and last == 'return'):
+                    groups['return'].append(child)
+                elif kind == 'see':
+                    groups['seealso'].append(child)
+                elif kind in ('note', 'warning'):
+                    text += render_node(child, text, indent=indent, width=width)
+                else:
+                    raise RuntimeError('unhandled simplesect: {}\n{}'.format(
+                        child.nodeName, child.toprettyxml(indent='  ', newl='\n')))
             else:
-                raise RuntimeError('unhandled simplesect: {}\n{}'.format(
-                    child.nodeName, child.toprettyxml(indent='  ', newl='\n')))
-        else:
-            text += render_node(child, text, indent=indent, width=width)
+                if (prev is not None
+                        and is_inline(self_or_child(prev))
+                        and is_inline(self_or_child(child))
+                        and '' != get_text(self_or_child(child)).strip()
+                        and ' ' != text[-1]):
+                    text += ' '
+                text += render_node(child, text, indent=indent, width=width)
+                prev = child
 
-    chunks['text'] = text
+    chunks['text'] += text
 
     # Generate map from the gathered items.
     if len(groups['params']) > 0:
@@ -441,7 +452,7 @@ def para_as_map(parent, indent='', width=62):
             update_params_map(child, ret_map=chunks['params'], width=width)
     for child in groups['return']:
         chunks['return'].append(render_node(
-            child, '', indent=indent, width=width).lstrip())
+            child, '', indent=indent, width=width))
     for child in groups['seealso']:
         chunks['seealso'].append(render_node(
             child, '', indent=indent, width=width))
@@ -463,20 +474,30 @@ def fmt_node_as_vimhelp(parent, width=62, indent=''):
     NB: Blank lines in a docstring manifest as <para> tags.
     """
     rendered_blocks = []
+
+    def fmt_param_doc(m):
+        """Renders a params map as Vim :help text."""
+        max_name_len = max_name(m.keys()) + 4
+        out = ''
+        for name, desc in m.items():
+            name = '    {}'.format('{{{}}}'.format(name).ljust(max_name_len))
+            out += '{}{}\n'.format(name, desc)
+        return out.rstrip()
+
+    def has_nonexcluded_params(m):
+        """Returns true if any of the given params has at least
+        one non-excluded item."""
+        if fmt_param_doc(m) != '':
+            return True
+
     for child in parent.childNodes:
         para = para_as_map(child, indent, width)
 
-        def has_nonexcluded_params(m):
-            """Returns true if any of the given params has at least
-            one non-excluded item."""
-            if fmt_params_map_as_vimhelp(m) != '':
-                return True
-
         # Generate text from the gathered items.
         chunks = [para['text']]
         if len(para['params']) > 0 and has_nonexcluded_params(para['params']):
             chunks.append('\nParameters: ~')
-            chunks.append(fmt_params_map_as_vimhelp(para['params'], width=width))
+            chunks.append(fmt_param_doc(para['params']))
         if len(para['return']) > 0:
             chunks.append('\nReturn: ~')
             for s in para['return']:
@@ -493,20 +514,21 @@ def fmt_node_as_vimhelp(parent, width=62, indent=''):
     return clean_lines('\n'.join(rendered_blocks).strip())
 
 
-def extract_from_xml(filename, mode, fmt_vimhelp):
+def extract_from_xml(filename, mode, width):
     """Extracts Doxygen info as maps without formatting the text.
 
     Returns two maps:
       1. Functions
       2. Deprecated functions
 
-    The `fmt_vimhelp` parameter controls some special cases for use by
+    The `fmt_vimhelp` global controls some special cases for use by
     fmt_doxygen_xml_as_vimhelp(). (TODO: ugly :)
     """
     global xrefs
+    global fmt_vimhelp
     xrefs.clear()
-    functions = {}  # Map of func_name:docstring.
-    deprecated_functions = {}  # Map of func_name:docstring.
+    fns = {}  # Map of func_name:docstring.
+    deprecated_fns = {}  # Map of func_name:docstring.
 
     dom = minidom.parse(filename)
     compoundname = get_text(dom.getElementsByTagName('compoundname')[0])
@@ -553,7 +575,7 @@ def extract_from_xml(filename, mode, fmt_vimhelp):
         params = []
         type_length = 0
 
-        for param in get_children(member, 'param'):
+        for param in iter_children(member, 'param'):
             param_type = get_text(get_child(param, 'type')).strip()
             param_name = ''
             declname = get_child(param, 'declname')
@@ -590,15 +612,15 @@ def extract_from_xml(filename, mode, fmt_vimhelp):
                                      '    ')
 
             # Minimum 8 chars between signature and vimtag
-            lhs = (text_width - 8) - len(prefix)
+            lhs = (width - 8) - len(prefix)
 
             if len(prefix) + len(suffix) > lhs:
-                signature = vimtag.rjust(text_width) + '\n'
-                signature += doc_wrap(suffix, width=text_width-8, prefix=prefix,
+                signature = vimtag.rjust(width) + '\n'
+                signature += doc_wrap(suffix, width=width-8, prefix=prefix,
                                       func=True)
             else:
                 signature = prefix + suffix
-                signature += vimtag.rjust(text_width - len(signature))
+                signature += vimtag.rjust(width - len(signature))
 
         paras = []
         desc = find_first(member, 'detaileddescription')
@@ -638,25 +660,27 @@ def extract_from_xml(filename, mode, fmt_vimhelp):
             fn['c_decl'] = c_decl
 
         if 'Deprecated' in str(xrefs):
-            deprecated_functions[name] = fn
+            deprecated_fns[name] = fn
         elif name.startswith(CONFIG[mode]['func_name_prefix']):
-            functions[name] = fn
+            fns[name] = fn
 
         xrefs.clear()
 
-    return (functions, deprecated_functions)
+    return (fns, deprecated_fns)
 
 
 def fmt_doxygen_xml_as_vimhelp(filename, mode):
-    """Formats functions from doxygen XML into Vim :help format.
+    """Entrypoint for generating Vim :help from from Doxygen XML.
 
-    Returns two strings:
-      1. Functions in Vim :help format
-      2. Deprecated functions (handled by caller, or ignored)
+    Returns 3 items:
+      1. Vim help text for functions found in `filename`.
+      2. Vim help text for deprecated functions.
     """
-    functions = {}  # Map of func_name:docstring.
-    deprecated_functions = {}  # Map of func_name:docstring.
-    fns, deprecated_fns = extract_from_xml(filename, mode, True)
+    global fmt_vimhelp
+    fmt_vimhelp = True
+    fns_txt = {}  # Map of func_name:vim-help-text.
+    deprecated_fns_txt = {}  # Map of func_name:vim-help-text.
+    fns, _ = extract_from_xml(filename, mode, width=text_width)
 
     for name, fn in fns.items():
         # Generate Vim :help for parameters.
@@ -685,15 +709,15 @@ def fmt_doxygen_xml_as_vimhelp(filename, mode):
         func_doc = re.sub(r'^\s+([<>])$', r'\1', func_doc, flags=re.M)
 
         if 'Deprecated' in xrefs:
-            deprecated_functions.append(func_doc)
+            deprecated_fns_txt[name] = func_doc
         elif name.startswith(CONFIG[mode]['func_name_prefix']):
-            functions[name] = func_doc
+            fns_txt[name] = func_doc
 
         xrefs.clear()
 
-    return ('\n\n'.join(list(functions.values())),
-            '\n\n'.join(deprecated_fns),
-            functions)
+    fmt_vimhelp = False
+    return ('\n\n'.join(list(fns_txt.values())),
+            '\n\n'.join(list(deprecated_fns_txt.values())))
 
 
 def delete_lines_below(filename, tokenstr):
@@ -710,14 +734,15 @@ def delete_lines_below(filename, tokenstr):
         fp.writelines(lines[0:i])
 
 
-def gen_docs(config):
-    """Generate formatted Vim :help docs and unformatted *.mpack files for use
-    by API clients.
+def main(config):
+    """Generates:
+
+    1. Vim :help docs
+    2. *.mpack files for use by API clients
 
     Doxygen is called and configured through stdin.
     """
     for mode in CONFIG:
-        functions = {}  # Map of func_name:docstring.
         mpack_file = os.path.join(
             base_dir, 'runtime', 'doc',
             CONFIG[mode]['filename'].replace('.txt', '.mpack'))
@@ -766,14 +791,13 @@ def gen_docs(config):
 
             filename = get_text(find_first(compound, 'name'))
             if filename.endswith('.c') or filename.endswith('.lua'):
+                # Extract unformatted (*.mpack).
                 fn_map, _ = extract_from_xml(os.path.join(base, '{}.xml'.format(
-                    compound.getAttribute('refid'))), mode, False)
-
-                functions_text, deprecated_text, fns = fmt_doxygen_xml_as_vimhelp(
+                    compound.getAttribute('refid'))), mode, width=9999)
+                # Extract formatted (:help).
+                functions_text, deprecated_text = fmt_doxygen_xml_as_vimhelp(
                     os.path.join(base, '{}.xml'.format(
                                  compound.getAttribute('refid'))), mode)
-                # Collect functions from all modules (for the current `mode`).
-                functions = {**functions, **fns}
 
                 if not functions_text and not deprecated_text:
                     continue
@@ -900,6 +924,6 @@ if __name__ == "__main__":
     if len(sys.argv) > 1:
         filter_source(sys.argv[1])
     else:
-        gen_docs(Doxyfile)
+        main(Doxyfile)
 
 # vim: set ft=python ts=4 sw=4 tw=79 et :
author	Justin M. Keyes <justinkz@gmail.com>	2019-12-24 00:04:14 -0800
committer	Justin M. Keyes <justinkz@gmail.com>	2019-12-30 03:12:24 -0800
commit	b81547ce6d182e2a7518e5a2f80d1ee90c5cdb8f (patch)
tree	12541b3c0931533db8f00ad8e98523c2bd329dc4 /scripts/gen_vimdoc.py
parent	27b678f577eb422ccc67e803727c3b26756787f3 (diff)
download	rneovim-b81547ce6d182e2a7518e5a2f80d1ee90c5cdb8f.tar.gz rneovim-b81547ce6d182e2a7518e5a2f80d1ee90c5cdb8f.tar.bz2 rneovim-b81547ce6d182e2a7518e5a2f80d1ee90c5cdb8f.zip