1 files changed, 220 insertions, 93 deletions
diff --git a/scripts/gen_api_vimdoc.py b/scripts/gen_vimdoc.py
index 515964bfe8..a62d18f02e 100755
--- a/scripts/gen_api_vimdoc.py
+++ b/scripts/gen_vimdoc.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-"""Parses Doxygen XML output to generate Neovim's API documentation.
+"""Generates Nvim help docs from C docstrings, by parsing Doxygen XML.
 
 This would be easier using lxml and XSLT, but:
 
@@ -35,6 +35,8 @@ import sys
 import shutil
 import textwrap
 import subprocess
+import collections
+import pprint
 
 from xml.dom import minidom
 
@@ -42,6 +44,10 @@ if sys.version_info[0] < 3:
     print("use Python 3")
     sys.exit(1)
 
+DEBUG = ('DEBUG' in os.environ)
+INCLUDE_C_DECL = ('INCLUDE_C_DECL' in os.environ)
+INCLUDE_DEPRECATED = ('INCLUDE_DEPRECATED' in os.environ)
+
 doc_filename = 'api.txt'
 # String used to find the start of the generated part of the doc.
 section_start_token = '*api-global*'
@@ -83,6 +89,12 @@ seen_funcs = set()
 # deprecated functions.
 xrefs = set()
 
+def debug_this(s, n):
+    o = n if isinstance(n, str) else n.toprettyxml(indent='  ', newl='\n')
+    name = '' if isinstance(n, str) else n.nodeName
+    if s in o:
+        raise RuntimeError('xxx: {}\n{}'.format(name, o))
+
 
 # XML Parsing Utilities {{{
 def find_first(parent, name):
@@ -123,6 +135,10 @@ def clean_lines(text):
     return re.sub(r'\A\n\s*\n*|\n\s*\n*\Z', '', re.sub(r'(\n\s*\n+)+', '\n\n', text))
 
 
+def is_blank(text):
+    return '' == clean_lines(text)
+
+
 def get_text(parent):
     """Combine all text in a node."""
     if parent.nodeType == parent.TEXT_NODE:
@@ -137,16 +153,43 @@ def get_text(parent):
     return out
 
 
-def doc_wrap(text, prefix='', width=70, func=False):
+# Gets the length of the last line in `text`, excluding newline ("\n") char.
+def len_lastline(text):
+    lastnl = text.rfind('\n')
+    if -1 == lastnl:
+        return len(text)
+    if '\n' == text[-1]:
+        return lastnl - (1+ text.rfind('\n', 0, lastnl))
+    return len(text) - (1 + lastnl)
+
+
+def len_lastline_withoutindent(text, indent):
+    n = len_lastline(text)
+    return (n - len(indent)) if n > len(indent) else 0
+
+
+# Returns True if node `n` contains only inline (not block-level) elements.
+def is_inline(n):
+    for c in n.childNodes:
+        if c.nodeType != c.TEXT_NODE and c.nodeName != 'computeroutput':
+            return False
+        if not is_inline(c):
+            return False
+    return True
+
+def doc_wrap(text, prefix='', width=70, func=False, indent=None):
     """Wraps text to `width`.
 
-    The first line is prefixed with `prefix`, and subsequent lines are aligned.
+    First line is prefixed with `prefix`, subsequent lines are aligned.
     If `func` is True, only wrap at commas.
     """
     if not width:
+        # return prefix + text
         return text
 
-    indent_space = ' ' * len(prefix)
+    # Whitespace used to indent all lines except the first line.
+    indent = ' ' * len(prefix) if indent is None else indent
+    indent_only = (prefix == '' and indent is not None)
 
     if func:
         lines = [prefix]
@@ -154,27 +197,37 @@ def doc_wrap(text, prefix='', width=70, func=False):
             if part[-1] not in ');':
                 part += ', '
             if len(lines[-1]) + len(part) > width:
-                lines.append(indent_space)
+                lines.append(indent)
             lines[-1] += part
         return '\n'.join(x.rstrip() for x in lines).rstrip()
 
+    # XXX: Dummy prefix to force TextWrapper() to wrap the first line.
+    if indent_only:
+        prefix = indent
+
     tw = textwrap.TextWrapper(break_long_words = False,
                               break_on_hyphens = False,
                               width=width,
                               initial_indent=prefix,
-                              subsequent_indent=indent_space)
-    return '\n'.join(tw.wrap(text.strip()))
+                              subsequent_indent=indent)
+    result = '\n'.join(tw.wrap(text.strip()))
+
+    # XXX: Remove the dummy prefix.
+    if indent_only:
+        result = result[len(indent):]
+
+    return result
 
 
-def parse_params(parent, width=62):
-    """Parse Doxygen `parameterlist`."""
+def render_params(parent, width=62):
+    """Renders Doxygen <parameterlist> tag as Vim help text."""
     name_length = 0
     items = []
-    for child in parent.childNodes:
-        if child.nodeType == child.TEXT_NODE:
+    for node in parent.childNodes:
+        if node.nodeType == node.TEXT_NODE:
             continue
 
-        name_node = find_first(child, 'parametername')
+        name_node = find_first(node, 'parametername')
         if name_node.getAttribute('direction') == 'out':
             continue
 
@@ -184,79 +237,152 @@ def parse_params(parent, width=62):
 
         name = '{%s}' % name
         name_length = max(name_length, len(name) + 2)
+        items.append((name.strip(), node))
+
+    out = ''
+    for name, node in items:
+        name = '    {}'.format(name.ljust(name_length))
 
         desc = ''
-        desc_node = get_child(child, 'parameterdescription')
+        desc_node = get_child(node, 'parameterdescription')
         if desc_node:
-            desc = parse_parblock(desc_node, width=None)
-        items.append((name.strip(), desc.strip()))
-
-    out = 'Parameters: ~\n'
-    for name, desc in items:
-        name = '    %s' % name.ljust(name_length)
-        out += doc_wrap(desc, prefix=name, width=width) + '\n'
-    return out.strip()
-
+            desc = parse_parblock(desc_node, width=width,
+                    indent=(' ' * len(name)))
+
+        out += '{}{}\n'.format(name, desc)
+    return out.rstrip()
+
+# Renders a node as Vim help text, recursively traversing all descendants.
+def render_node(n, text, prefix='', indent='', width=62):
+    text = ''
+    # space_preceding = (len(text) > 0 and ' ' == text[-1][-1])
+    # text += (int(not space_preceding) * ' ')
+
+    if n.nodeType == n.TEXT_NODE:
+        # `prefix` is NOT sent to doc_wrap, it was already handled by now.
+        text += doc_wrap(n.data, indent=indent, width=width)
+    elif n.nodeName == 'computeroutput':
+        text += ' `{}` '.format(get_text(n))
+    elif is_inline(n):
+        for c in n.childNodes:
+            text += render_node(c, text)
+        text = doc_wrap(text, indent=indent, width=width)
+    elif n.nodeName == 'verbatim':
+        # TODO: currently we don't use this. The "[verbatim]" hint is there as
+        # a reminder that we must decide how to format this if we do use it.
+        text += ' [verbatim] {}'.format(get_text(n))
+    elif n.nodeName == 'listitem':
+        for c in n.childNodes:
+            text += indent + prefix + render_node(c, text, indent=indent+(' ' * len(prefix)), width=width)
+    elif n.nodeName == 'para':
+        for c in n.childNodes:
+            text += render_node(c, text, indent=indent, width=width)
+        if is_inline(n):
+            text = doc_wrap(text, indent=indent, width=width)
+    elif n.nodeName == 'itemizedlist':
+        for c in n.childNodes:
+            text += '{}\n'.format(render_node(c, text, prefix='- ',
+                indent=indent, width=width))
+    elif n.nodeName == 'orderedlist':
+        i = 1
+        for c in n.childNodes:
+            if is_blank(get_text(c)):
+                text += '\n'
+                continue
+            text += '{}\n'.format(render_node(c, text, prefix='{}. '.format(i),
+                indent=indent, width=width))
+            i = i + 1
+    elif n.nodeName == 'simplesect' and 'note' == n.getAttribute('kind'):
+        text += 'Note:\n    '
+        for c in n.childNodes:
+            text += render_node(c, text, indent='    ', width=width)
+        text += '\n'
+    elif n.nodeName == 'simplesect' and 'warning' == n.getAttribute('kind'):
+        text += 'Warning:\n    '
+        for c in n.childNodes:
+            text += render_node(c, text, indent='    ', width=width)
+        text += '\n'
+    elif (n.nodeName == 'simplesect'
+            and n.getAttribute('kind') in ('return', 'see')):
+        text += '    '
+        for c in n.childNodes:
+            text += render_node(c, text, indent='    ', width=width)
+    else:
+        raise RuntimeError('unhandled node type: {}\n{}'.format(
+            n.nodeName, n.toprettyxml(indent='  ', newl='\n')))
+    return text
 
-def parse_para(parent, width=62):
-    """Parse doxygen `para` tag.
+def render_para(parent, indent='', width=62):
+    """Renders Doxygen <para> containing arbitrary nodes.
 
-    I assume <para> is a paragraph block or "a block of text".  It can contain
-    text nodes, or other tags.
+    NB: Blank lines in a docstring manifest as <para> tags.
     """
-    line = ''
-    lines = []
+    if is_inline(parent):
+        return clean_lines(doc_wrap(render_node(parent, ''),
+            indent=indent, width=width).strip())
+
+    # Ordered dict of ordered lists.
+    groups = collections.OrderedDict([
+        ('params', []),
+        ('return', []),
+        ('seealso', []),
+        ('xrefs', []),
+    ])
+
+    # Gather nodes into groups.  Mostly this is because we want "parameterlist"
+    # nodes to appear together.
+    text = ''
+    kind = ''
+    last = ''
     for child in parent.childNodes:
-        if child.nodeType == child.TEXT_NODE:
-            line += child.data
-        elif child.nodeName == 'computeroutput':
-            line += '`%s`' % get_text(child)
-        else:
-            if line:
-                lines.append(doc_wrap(line, width=width))
-                line = ''
-
-            if child.nodeName == 'parameterlist':
-                lines.append(parse_params(child, width=width))
-            elif child.nodeName == 'xrefsect':
-                title = get_text(get_child(child, 'xreftitle'))
-                xrefs.add(title)
-                xrefdesc = parse_para(get_child(child, 'xrefdescription'))
-                lines.append(doc_wrap(xrefdesc, prefix='%s: ' % title,
-                                      width=width) + '\n')
-            elif child.nodeName == 'simplesect':
-                kind = child.getAttribute('kind')
-                if kind == 'note':
-                    lines.append('Note:')
-                    lines.append(doc_wrap(parse_para(child),
-                                          prefix='    ',
-                                          width=width))
-                elif kind == 'return':
-                    lines.append('%s: ~' % kind.title())
-                    lines.append(doc_wrap(parse_para(child),
-                                          prefix='    ',
-                                          width=width))
+        if child.nodeName == 'parameterlist':
+            groups['params'].append(child)
+        elif child.nodeName == 'xrefsect':
+            groups['xrefs'].append(child)
+        elif child.nodeName == 'simplesect':
+            last = kind
+            kind = child.getAttribute('kind')
+            if kind == 'return' or (kind == 'note' and last == 'return'):
+                groups['return'].append(child)
+            elif kind == 'see':
+                groups['seealso'].append(child)
+            elif kind in ('note', 'warning'):
+                text += render_node(child, text, indent=indent, width=width)
             else:
-                lines.append(get_text(child))
-
-    if line:
-        lines.append(doc_wrap(line, width=width))
-    return clean_lines('\n'.join(lines).strip())
-
-
-def parse_parblock(parent, width=62):
-    """Parses a nested block of `para` tags.
-
-    Named after the \parblock command, but not directly related.
-    """
+                raise RuntimeError('unhandled simplesect: {}\n{}'.format(
+                    child.nodeName, child.toprettyxml(indent='  ', newl='\n')))
+        else:
+            text += render_node(child, text, indent=indent, width=width)
+
+    chunks = [text]
+    # Generate text from the gathered items.
+    if len(groups['params']) > 0:
+        chunks.append('\nParameters: ~')
+        for child in groups['params']:
+            chunks.append(render_params(child, width=width))
+    if len(groups['return']) > 0:
+        chunks.append('\nReturn: ~')
+        for child in groups['return']:
+            chunks.append(render_node(child, chunks[-1][-1], indent=indent, width=width))
+    if len(groups['seealso']) > 0:
+        chunks.append('\nSee also: ~')
+        for child in groups['seealso']:
+            chunks.append(render_node(child, chunks[-1][-1], indent=indent, width=width))
+    for child in groups['xrefs']:
+        title = get_text(get_child(child, 'xreftitle'))
+        xrefs.add(title)
+        xrefdesc = render_para(get_child(child, 'xrefdescription'), width=width)
+        chunks.append(doc_wrap(xrefdesc, prefix='{}: '.format(title),
+                              width=width) + '\n')
+
+    return clean_lines('\n'.join(chunks).strip())
+
+
+def parse_parblock(parent, prefix='', width=62, indent=''):
+    """Renders a nested block of <para> tags as Vim help text."""
     paragraphs = []
     for child in parent.childNodes:
-        if child.nodeType == child.TEXT_NODE:
-            paragraphs.append(doc_wrap(child.data, width=width))
-        elif child.nodeName == 'para':
-            paragraphs.append(parse_para(child, width=width))
-        else:
-            paragraphs.append(doc_wrap(get_text(child), width=width))
+        paragraphs.append(render_para(child, width=width, indent=indent))
         paragraphs.append('')
     return clean_lines('\n'.join(paragraphs).strip())
 # }}}
@@ -292,7 +418,7 @@ def parse_source_xml(filename):
 
         if return_type.startswith(('ArrayOf', 'DictionaryOf')):
             parts = return_type.strip('_').split('_')
-            return_type = '%s(%s)' % (parts[0], ', '.join(parts[1:]))
+            return_type = '{}({})'.format(parts[0], ', '.join(parts[1:]))
 
         name = get_text(get_child(member, 'name'))
 
@@ -306,37 +432,37 @@ def parse_source_xml(filename):
         annotations = filter(None, map(lambda x: annotation_map.get(x),
                                        annotations.split()))
 
-        vimtag = '*%s()*' % name
-        args = []
+        vimtag = '*{}()*'.format(name)
+        params = []
         type_length = 0
 
         for param in get_children(member, 'param'):
-            arg_type = get_text(get_child(param, 'type')).strip()
-            arg_name = ''
+            param_type = get_text(get_child(param, 'type')).strip()
+            param_name = ''
             declname = get_child(param, 'declname')
             if declname:
-                arg_name = get_text(declname).strip()
+                param_name = get_text(declname).strip()
 
-            if arg_name in param_exclude:
+            if param_name in param_exclude:
                 continue
 
-            if arg_type.endswith('*'):
-                arg_type = arg_type.strip('* ')
-                arg_name = '*' + arg_name
-            type_length = max(type_length, len(arg_type))
-            args.append((arg_type, arg_name))
+            if param_type.endswith('*'):
+                param_type = param_type.strip('* ')
+                param_name = '*' + param_name
+            type_length = max(type_length, len(param_type))
+            params.append((param_type, param_name))
 
         c_args = []
-        for arg_type, arg_name in args:
+        for param_type, param_name in params:
             c_args.append('    ' + (
-                '%s %s' % (arg_type.ljust(type_length), arg_name)).strip())
+                '%s %s' % (param_type.ljust(type_length), param_name)).strip())
 
         c_decl = textwrap.indent('%s %s(\n%s\n);' % (return_type, name,
                                                      ',\n'.join(c_args)),
                                  '    ')
 
         prefix = '%s(' % name
-        suffix = '%s)' % ', '.join('{%s}' % a[1] for a in args
+        suffix = '%s)' % ', '.join('{%s}' % a[1] for a in params
                                    if a[0] not in ('void', 'Error'))
 
         # Minimum 8 chars between signature and vimtag
@@ -354,7 +480,7 @@ def parse_source_xml(filename):
         desc = find_first(member, 'detaileddescription')
         if desc:
             doc = parse_parblock(desc)
-            if 'DEBUG' in os.environ:
+            if DEBUG:
                 print(textwrap.indent(
                     re.sub(r'\n\s*\n+', '\n',
                            desc.toprettyxml(indent='  ', newl='\n')), ' ' * 16))
@@ -372,7 +498,7 @@ def parse_source_xml(filename):
             else:
                 doc = doc[:i] + annotations + '\n\n' + doc[i:]
 
-        if 'INCLUDE_C_DECL' in os.environ:
+        if INCLUDE_C_DECL:
             doc += '\n\nC Declaration: ~\n>\n'
             doc += c_decl
             doc += '\n<'
@@ -464,7 +590,7 @@ def gen_docs(config):
                 if functions:
                     doc += '\n\n' + functions
 
-                if 'INCLUDE_DEPRECATED' in os.environ and deprecated:
+                if INCLUDE_DEPRECATED and deprecated:
                     doc += '\n\n\nDeprecated %s Functions: ~\n\n' % name
                     doc += deprecated
 
@@ -551,6 +677,7 @@ XML_PROGRAMLISTING     = NO
 ENABLE_PREPROCESSING   = YES
 MACRO_EXPANSION        = YES
 EXPAND_ONLY_PREDEF     = NO
+MARKDOWN_SUPPORT       = YES
 '''
 # }}}