diff options
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/gen_vimdoc.py (renamed from scripts/gen_api_vimdoc.py) | 313 |
1 files changed, 220 insertions, 93 deletions
diff --git a/scripts/gen_api_vimdoc.py b/scripts/gen_vimdoc.py index 515964bfe8..a62d18f02e 100755 --- a/scripts/gen_api_vimdoc.py +++ b/scripts/gen_vimdoc.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""Parses Doxygen XML output to generate Neovim's API documentation. +"""Generates Nvim help docs from C docstrings, by parsing Doxygen XML. This would be easier using lxml and XSLT, but: @@ -35,6 +35,8 @@ import sys import shutil import textwrap import subprocess +import collections +import pprint from xml.dom import minidom @@ -42,6 +44,10 @@ if sys.version_info[0] < 3: print("use Python 3") sys.exit(1) +DEBUG = ('DEBUG' in os.environ) +INCLUDE_C_DECL = ('INCLUDE_C_DECL' in os.environ) +INCLUDE_DEPRECATED = ('INCLUDE_DEPRECATED' in os.environ) + doc_filename = 'api.txt' # String used to find the start of the generated part of the doc. section_start_token = '*api-global*' @@ -83,6 +89,12 @@ seen_funcs = set() # deprecated functions. xrefs = set() +def debug_this(s, n): + o = n if isinstance(n, str) else n.toprettyxml(indent=' ', newl='\n') + name = '' if isinstance(n, str) else n.nodeName + if s in o: + raise RuntimeError('xxx: {}\n{}'.format(name, o)) + # XML Parsing Utilities {{{ def find_first(parent, name): @@ -123,6 +135,10 @@ def clean_lines(text): return re.sub(r'\A\n\s*\n*|\n\s*\n*\Z', '', re.sub(r'(\n\s*\n+)+', '\n\n', text)) +def is_blank(text): + return '' == clean_lines(text) + + def get_text(parent): """Combine all text in a node.""" if parent.nodeType == parent.TEXT_NODE: @@ -137,16 +153,43 @@ def get_text(parent): return out -def doc_wrap(text, prefix='', width=70, func=False): +# Gets the length of the last line in `text`, excluding newline ("\n") char. +def len_lastline(text): + lastnl = text.rfind('\n') + if -1 == lastnl: + return len(text) + if '\n' == text[-1]: + return lastnl - (1+ text.rfind('\n', 0, lastnl)) + return len(text) - (1 + lastnl) + + +def len_lastline_withoutindent(text, indent): + n = len_lastline(text) + return (n - len(indent)) if n > len(indent) else 0 + + +# Returns True if node `n` contains only inline (not block-level) elements. +def is_inline(n): + for c in n.childNodes: + if c.nodeType != c.TEXT_NODE and c.nodeName != 'computeroutput': + return False + if not is_inline(c): + return False + return True + +def doc_wrap(text, prefix='', width=70, func=False, indent=None): """Wraps text to `width`. - The first line is prefixed with `prefix`, and subsequent lines are aligned. + First line is prefixed with `prefix`, subsequent lines are aligned. If `func` is True, only wrap at commas. """ if not width: + # return prefix + text return text - indent_space = ' ' * len(prefix) + # Whitespace used to indent all lines except the first line. + indent = ' ' * len(prefix) if indent is None else indent + indent_only = (prefix == '' and indent is not None) if func: lines = [prefix] @@ -154,27 +197,37 @@ def doc_wrap(text, prefix='', width=70, func=False): if part[-1] not in ');': part += ', ' if len(lines[-1]) + len(part) > width: - lines.append(indent_space) + lines.append(indent) lines[-1] += part return '\n'.join(x.rstrip() for x in lines).rstrip() + # XXX: Dummy prefix to force TextWrapper() to wrap the first line. + if indent_only: + prefix = indent + tw = textwrap.TextWrapper(break_long_words = False, break_on_hyphens = False, width=width, initial_indent=prefix, - subsequent_indent=indent_space) - return '\n'.join(tw.wrap(text.strip())) + subsequent_indent=indent) + result = '\n'.join(tw.wrap(text.strip())) + + # XXX: Remove the dummy prefix. + if indent_only: + result = result[len(indent):] + + return result -def parse_params(parent, width=62): - """Parse Doxygen `parameterlist`.""" +def render_params(parent, width=62): + """Renders Doxygen <parameterlist> tag as Vim help text.""" name_length = 0 items = [] - for child in parent.childNodes: - if child.nodeType == child.TEXT_NODE: + for node in parent.childNodes: + if node.nodeType == node.TEXT_NODE: continue - name_node = find_first(child, 'parametername') + name_node = find_first(node, 'parametername') if name_node.getAttribute('direction') == 'out': continue @@ -184,79 +237,152 @@ def parse_params(parent, width=62): name = '{%s}' % name name_length = max(name_length, len(name) + 2) + items.append((name.strip(), node)) + + out = '' + for name, node in items: + name = ' {}'.format(name.ljust(name_length)) desc = '' - desc_node = get_child(child, 'parameterdescription') + desc_node = get_child(node, 'parameterdescription') if desc_node: - desc = parse_parblock(desc_node, width=None) - items.append((name.strip(), desc.strip())) - - out = 'Parameters: ~\n' - for name, desc in items: - name = ' %s' % name.ljust(name_length) - out += doc_wrap(desc, prefix=name, width=width) + '\n' - return out.strip() - + desc = parse_parblock(desc_node, width=width, + indent=(' ' * len(name))) + + out += '{}{}\n'.format(name, desc) + return out.rstrip() + +# Renders a node as Vim help text, recursively traversing all descendants. +def render_node(n, text, prefix='', indent='', width=62): + text = '' + # space_preceding = (len(text) > 0 and ' ' == text[-1][-1]) + # text += (int(not space_preceding) * ' ') + + if n.nodeType == n.TEXT_NODE: + # `prefix` is NOT sent to doc_wrap, it was already handled by now. + text += doc_wrap(n.data, indent=indent, width=width) + elif n.nodeName == 'computeroutput': + text += ' `{}` '.format(get_text(n)) + elif is_inline(n): + for c in n.childNodes: + text += render_node(c, text) + text = doc_wrap(text, indent=indent, width=width) + elif n.nodeName == 'verbatim': + # TODO: currently we don't use this. The "[verbatim]" hint is there as + # a reminder that we must decide how to format this if we do use it. + text += ' [verbatim] {}'.format(get_text(n)) + elif n.nodeName == 'listitem': + for c in n.childNodes: + text += indent + prefix + render_node(c, text, indent=indent+(' ' * len(prefix)), width=width) + elif n.nodeName == 'para': + for c in n.childNodes: + text += render_node(c, text, indent=indent, width=width) + if is_inline(n): + text = doc_wrap(text, indent=indent, width=width) + elif n.nodeName == 'itemizedlist': + for c in n.childNodes: + text += '{}\n'.format(render_node(c, text, prefix='- ', + indent=indent, width=width)) + elif n.nodeName == 'orderedlist': + i = 1 + for c in n.childNodes: + if is_blank(get_text(c)): + text += '\n' + continue + text += '{}\n'.format(render_node(c, text, prefix='{}. '.format(i), + indent=indent, width=width)) + i = i + 1 + elif n.nodeName == 'simplesect' and 'note' == n.getAttribute('kind'): + text += 'Note:\n ' + for c in n.childNodes: + text += render_node(c, text, indent=' ', width=width) + text += '\n' + elif n.nodeName == 'simplesect' and 'warning' == n.getAttribute('kind'): + text += 'Warning:\n ' + for c in n.childNodes: + text += render_node(c, text, indent=' ', width=width) + text += '\n' + elif (n.nodeName == 'simplesect' + and n.getAttribute('kind') in ('return', 'see')): + text += ' ' + for c in n.childNodes: + text += render_node(c, text, indent=' ', width=width) + else: + raise RuntimeError('unhandled node type: {}\n{}'.format( + n.nodeName, n.toprettyxml(indent=' ', newl='\n'))) + return text -def parse_para(parent, width=62): - """Parse doxygen `para` tag. +def render_para(parent, indent='', width=62): + """Renders Doxygen <para> containing arbitrary nodes. - I assume <para> is a paragraph block or "a block of text". It can contain - text nodes, or other tags. + NB: Blank lines in a docstring manifest as <para> tags. """ - line = '' - lines = [] + if is_inline(parent): + return clean_lines(doc_wrap(render_node(parent, ''), + indent=indent, width=width).strip()) + + # Ordered dict of ordered lists. + groups = collections.OrderedDict([ + ('params', []), + ('return', []), + ('seealso', []), + ('xrefs', []), + ]) + + # Gather nodes into groups. Mostly this is because we want "parameterlist" + # nodes to appear together. + text = '' + kind = '' + last = '' for child in parent.childNodes: - if child.nodeType == child.TEXT_NODE: - line += child.data - elif child.nodeName == 'computeroutput': - line += '`%s`' % get_text(child) - else: - if line: - lines.append(doc_wrap(line, width=width)) - line = '' - - if child.nodeName == 'parameterlist': - lines.append(parse_params(child, width=width)) - elif child.nodeName == 'xrefsect': - title = get_text(get_child(child, 'xreftitle')) - xrefs.add(title) - xrefdesc = parse_para(get_child(child, 'xrefdescription')) - lines.append(doc_wrap(xrefdesc, prefix='%s: ' % title, - width=width) + '\n') - elif child.nodeName == 'simplesect': - kind = child.getAttribute('kind') - if kind == 'note': - lines.append('Note:') - lines.append(doc_wrap(parse_para(child), - prefix=' ', - width=width)) - elif kind == 'return': - lines.append('%s: ~' % kind.title()) - lines.append(doc_wrap(parse_para(child), - prefix=' ', - width=width)) + if child.nodeName == 'parameterlist': + groups['params'].append(child) + elif child.nodeName == 'xrefsect': + groups['xrefs'].append(child) + elif child.nodeName == 'simplesect': + last = kind + kind = child.getAttribute('kind') + if kind == 'return' or (kind == 'note' and last == 'return'): + groups['return'].append(child) + elif kind == 'see': + groups['seealso'].append(child) + elif kind in ('note', 'warning'): + text += render_node(child, text, indent=indent, width=width) else: - lines.append(get_text(child)) - - if line: - lines.append(doc_wrap(line, width=width)) - return clean_lines('\n'.join(lines).strip()) - - -def parse_parblock(parent, width=62): - """Parses a nested block of `para` tags. - - Named after the \parblock command, but not directly related. - """ + raise RuntimeError('unhandled simplesect: {}\n{}'.format( + child.nodeName, child.toprettyxml(indent=' ', newl='\n'))) + else: + text += render_node(child, text, indent=indent, width=width) + + chunks = [text] + # Generate text from the gathered items. + if len(groups['params']) > 0: + chunks.append('\nParameters: ~') + for child in groups['params']: + chunks.append(render_params(child, width=width)) + if len(groups['return']) > 0: + chunks.append('\nReturn: ~') + for child in groups['return']: + chunks.append(render_node(child, chunks[-1][-1], indent=indent, width=width)) + if len(groups['seealso']) > 0: + chunks.append('\nSee also: ~') + for child in groups['seealso']: + chunks.append(render_node(child, chunks[-1][-1], indent=indent, width=width)) + for child in groups['xrefs']: + title = get_text(get_child(child, 'xreftitle')) + xrefs.add(title) + xrefdesc = render_para(get_child(child, 'xrefdescription'), width=width) + chunks.append(doc_wrap(xrefdesc, prefix='{}: '.format(title), + width=width) + '\n') + + return clean_lines('\n'.join(chunks).strip()) + + +def parse_parblock(parent, prefix='', width=62, indent=''): + """Renders a nested block of <para> tags as Vim help text.""" paragraphs = [] for child in parent.childNodes: - if child.nodeType == child.TEXT_NODE: - paragraphs.append(doc_wrap(child.data, width=width)) - elif child.nodeName == 'para': - paragraphs.append(parse_para(child, width=width)) - else: - paragraphs.append(doc_wrap(get_text(child), width=width)) + paragraphs.append(render_para(child, width=width, indent=indent)) paragraphs.append('') return clean_lines('\n'.join(paragraphs).strip()) # }}} @@ -292,7 +418,7 @@ def parse_source_xml(filename): if return_type.startswith(('ArrayOf', 'DictionaryOf')): parts = return_type.strip('_').split('_') - return_type = '%s(%s)' % (parts[0], ', '.join(parts[1:])) + return_type = '{}({})'.format(parts[0], ', '.join(parts[1:])) name = get_text(get_child(member, 'name')) @@ -306,37 +432,37 @@ def parse_source_xml(filename): annotations = filter(None, map(lambda x: annotation_map.get(x), annotations.split())) - vimtag = '*%s()*' % name - args = [] + vimtag = '*{}()*'.format(name) + params = [] type_length = 0 for param in get_children(member, 'param'): - arg_type = get_text(get_child(param, 'type')).strip() - arg_name = '' + param_type = get_text(get_child(param, 'type')).strip() + param_name = '' declname = get_child(param, 'declname') if declname: - arg_name = get_text(declname).strip() + param_name = get_text(declname).strip() - if arg_name in param_exclude: + if param_name in param_exclude: continue - if arg_type.endswith('*'): - arg_type = arg_type.strip('* ') - arg_name = '*' + arg_name - type_length = max(type_length, len(arg_type)) - args.append((arg_type, arg_name)) + if param_type.endswith('*'): + param_type = param_type.strip('* ') + param_name = '*' + param_name + type_length = max(type_length, len(param_type)) + params.append((param_type, param_name)) c_args = [] - for arg_type, arg_name in args: + for param_type, param_name in params: c_args.append(' ' + ( - '%s %s' % (arg_type.ljust(type_length), arg_name)).strip()) + '%s %s' % (param_type.ljust(type_length), param_name)).strip()) c_decl = textwrap.indent('%s %s(\n%s\n);' % (return_type, name, ',\n'.join(c_args)), ' ') prefix = '%s(' % name - suffix = '%s)' % ', '.join('{%s}' % a[1] for a in args + suffix = '%s)' % ', '.join('{%s}' % a[1] for a in params if a[0] not in ('void', 'Error')) # Minimum 8 chars between signature and vimtag @@ -354,7 +480,7 @@ def parse_source_xml(filename): desc = find_first(member, 'detaileddescription') if desc: doc = parse_parblock(desc) - if 'DEBUG' in os.environ: + if DEBUG: print(textwrap.indent( re.sub(r'\n\s*\n+', '\n', desc.toprettyxml(indent=' ', newl='\n')), ' ' * 16)) @@ -372,7 +498,7 @@ def parse_source_xml(filename): else: doc = doc[:i] + annotations + '\n\n' + doc[i:] - if 'INCLUDE_C_DECL' in os.environ: + if INCLUDE_C_DECL: doc += '\n\nC Declaration: ~\n>\n' doc += c_decl doc += '\n<' @@ -464,7 +590,7 @@ def gen_docs(config): if functions: doc += '\n\n' + functions - if 'INCLUDE_DEPRECATED' in os.environ and deprecated: + if INCLUDE_DEPRECATED and deprecated: doc += '\n\n\nDeprecated %s Functions: ~\n\n' % name doc += deprecated @@ -551,6 +677,7 @@ XML_PROGRAMLISTING = NO ENABLE_PREPROCESSING = YES MACRO_EXPANSION = YES EXPAND_ONLY_PREDEF = NO +MARKDOWN_SUPPORT = YES ''' # }}} |