diff options
author | Lewis Russell <lewis6991@gmail.com> | 2024-02-15 17:16:04 +0000 |
---|---|---|
committer | Lewis Russell <me@lewisr.dev> | 2024-02-27 14:41:17 +0000 |
commit | 9beb40a4db5613601fc1a4b828a44e5977eca046 (patch) | |
tree | 314096d28ccdf2a2b035091783baa35193887d6a /scripts/luacats_parser.lua | |
parent | 7ad2e3c64562bfb0ea2f7be305e4b0e6d2474d64 (diff) | |
download | rneovim-9beb40a4db5613601fc1a4b828a44e5977eca046.tar.gz rneovim-9beb40a4db5613601fc1a4b828a44e5977eca046.tar.bz2 rneovim-9beb40a4db5613601fc1a4b828a44e5977eca046.zip |
feat(docs): replace lua2dox.lua
Problem:
The documentation flow (`gen_vimdoc.py`) has several issues:
- it's not very versatile
- depends on doxygen
- doesn't work well with Lua code as it requires an awkward filter script to convert it into pseudo-C.
- The intermediate XML files and filters makes it too much like a rube goldberg machine.
Solution:
Re-implement the flow using Lua, LPEG and treesitter.
- `gen_vimdoc.py` is now replaced with `gen_vimdoc.lua` and replicates a portion of the logic.
- `lua2dox.lua` is gone!
- No more XML files.
- Doxygen is now longer used and instead we now use:
- LPEG for comment parsing (see `scripts/luacats_grammar.lua` and `scripts/cdoc_grammar.lua`).
- LPEG for C parsing (see `scripts/cdoc_parser.lua`)
- Lua patterns for Lua parsing (see `scripts/luacats_parser.lua`).
- Treesitter for Markdown parsing (see `scripts/text_utils.lua`).
- The generated `runtime/doc/*.mpack` files have been removed.
- `scripts/gen_eval_files.lua` now instead uses `scripts/cdoc_parser.lua` directly.
- Text wrapping is implemented in `scripts/text_utils.lua` and appears to produce more consistent results (the main contributer to the diff of this change).
Diffstat (limited to 'scripts/luacats_parser.lua')
-rw-r--r-- | scripts/luacats_parser.lua | 521 |
1 files changed, 521 insertions, 0 deletions
diff --git a/scripts/luacats_parser.lua b/scripts/luacats_parser.lua new file mode 100644 index 0000000000..520272d1dc --- /dev/null +++ b/scripts/luacats_parser.lua @@ -0,0 +1,521 @@ +local luacats_grammar = require('scripts.luacats_grammar') + +--- @class nvim.luacats.parser.param +--- @field name string +--- @field type string +--- @field desc string + +--- @class nvim.luacats.parser.return +--- @field name string +--- @field type string +--- @field desc string + +--- @class nvim.luacats.parser.note +--- @field desc string + +--- @class nvim.luacats.parser.brief +--- @field kind 'brief' +--- @field desc string + +--- @class nvim.luacats.parser.alias +--- @field kind 'alias' +--- @field type string +--- @field desc string + +--- @class nvim.luacats.parser.fun +--- @field name string +--- @field params nvim.luacats.parser.param[] +--- @field returns nvim.luacats.parser.return[] +--- @field desc string +--- @field access? 'private'|'package'|'protected' +--- @field class? string +--- @field module? string +--- @field modvar? string +--- @field classvar? string +--- @field deprecated? true +--- @field since? string +--- @field attrs? string[] +--- @field nodoc? true +--- @field generics? table<string,string> +--- @field table? true +--- @field notes? nvim.luacats.parser.note[] +--- @field see? nvim.luacats.parser.note[] + +--- @class nvim.luacats.parser.field +--- @field name string +--- @field type string +--- @field desc string +--- @field access? 'private'|'package'|'protected' + +--- @class nvim.luacats.parser.class +--- @field kind 'class' +--- @field name string +--- @field desc string +--- @field fields nvim.luacats.parser.field[] +--- @field notes? string[] + +--- @class nvim.luacats.parser.State +--- @field doc_lines? string[] +--- @field cur_obj? nvim.luacats.parser.obj +--- @field last_doc_item? nvim.luacats.parser.param|nvim.luacats.parser.return|nvim.luacats.parser.note +--- @field last_doc_item_indent? integer + +--- @alias nvim.luacats.parser.obj +--- | nvim.luacats.parser.class +--- | nvim.luacats.parser.fun +--- | nvim.luacats.parser.brief + +-- Remove this when we document classes properly +--- Some doc lines have the form: +--- param name some.complex.type (table) description +--- if so then transform the line to remove the complex type: +--- param name (table) description +--- @param line string +local function use_type_alt(line) + for _, type in ipairs({ 'table', 'function' }) do + line = line:gsub('@param%s+([a-zA-Z_?]+)%s+.*%((' .. type .. ')%)', '@param %1 %2') + line = line:gsub('@param%s+([a-zA-Z_?]+)%s+.*%((' .. type .. '|nil)%)', '@param %1 %2') + line = line:gsub('@param%s+([a-zA-Z_?]+)%s+.*%((' .. type .. '%?)%)', '@param %1 %2') + + line = line:gsub('@return%s+.*%((' .. type .. ')%)', '@return %1') + line = line:gsub('@return%s+.*%((' .. type .. '|nil)%)', '@return %1') + line = line:gsub('@return%s+.*%((' .. type .. '%?)%)', '@return %1') + end + return line +end + +--- If we collected any `---` lines. Add them to the existing (or new) object +--- Used for function/class descriptions and multiline param descriptions. +--- @param state nvim.luacats.parser.State +local function add_doc_lines_to_obj(state) + if state.doc_lines then + state.cur_obj = state.cur_obj or {} + local cur_obj = assert(state.cur_obj) + local txt = table.concat(state.doc_lines, '\n') + if cur_obj.desc then + cur_obj.desc = cur_obj.desc .. '\n' .. txt + else + cur_obj.desc = txt + end + state.doc_lines = nil + end +end + +--- @param line string +--- @param state nvim.luacats.parser.State +local function process_doc_line(line, state) + line = line:sub(4):gsub('^%s+@', '@') + line = use_type_alt(line) + + local parsed = luacats_grammar:match(line) + + if not parsed then + if line:match('^ ') then + line = line:sub(2) + end + + if state.last_doc_item then + if not state.last_doc_item_indent then + state.last_doc_item_indent = #line:match('^%s*') + 1 + end + state.last_doc_item.desc = (state.last_doc_item.desc or '') + .. '\n' + .. line:sub(state.last_doc_item_indent or 1) + else + state.doc_lines = state.doc_lines or {} + table.insert(state.doc_lines, line) + end + return + end + + state.last_doc_item_indent = nil + state.last_doc_item = nil + state.cur_obj = state.cur_obj or {} + local cur_obj = assert(state.cur_obj) + + local kind = parsed.kind + + if kind == 'brief' then + state.cur_obj = { + kind = 'brief', + desc = parsed.desc, + } + elseif kind == 'class' then + --- @cast parsed nvim.luacats.Class + state.cur_obj = { + kind = 'class', + name = parsed.name, + parent = parsed.parent, + desc = '', + fields = {}, + } + elseif kind == 'field' then + --- @cast parsed nvim.luacats.Field + if not parsed.access then + parsed.desc = parsed.desc or state.doc_lines and table.concat(state.doc_lines, '\n') or nil + if parsed.desc then + parsed.desc = vim.trim(parsed.desc) + end + table.insert(cur_obj.fields, parsed) + end + state.doc_lines = nil + elseif kind == 'param' then + state.last_doc_item_indent = nil + cur_obj.params = cur_obj.params or {} + if vim.endswith(parsed.name, '?') then + parsed.name = parsed.name:sub(1, -2) + parsed.type = parsed.type .. '?' + end + state.last_doc_item = { + name = parsed.name, + type = parsed.type, + desc = parsed.desc, + } + table.insert(cur_obj.params, state.last_doc_item) + elseif kind == 'return' then + cur_obj.returns = cur_obj.returns or {} + for _, t in ipairs(parsed) do + table.insert(cur_obj.returns, { + name = t.name, + type = t.type, + desc = parsed.desc, + }) + end + state.last_doc_item_indent = nil + state.last_doc_item = cur_obj.returns[#cur_obj.returns] + elseif kind == 'private' then + cur_obj.access = 'private' + elseif kind == 'package' then + cur_obj.access = 'package' + elseif kind == 'protected' then + cur_obj.access = 'protected' + elseif kind == 'deprecated' then + cur_obj.deprecated = true + elseif kind == 'nodoc' then + cur_obj.nodoc = true + elseif kind == 'since' then + cur_obj.since = parsed.desc + elseif kind == 'see' then + cur_obj.see = cur_obj.see or {} + table.insert(cur_obj.see, { desc = parsed.desc }) + elseif kind == 'note' then + state.last_doc_item_indent = nil + state.last_doc_item = { + desc = parsed.desc, + } + cur_obj.notes = cur_obj.notes or {} + table.insert(cur_obj.notes, state.last_doc_item) + elseif kind == 'type' then + cur_obj.desc = parsed.desc + parsed.desc = nil + parsed.kind = nil + cur_obj.type = parsed + elseif kind == 'alias' then + state.cur_obj = { + kind = 'alias', + desc = parsed.desc, + } + elseif kind == 'enum' then + -- TODO + state.doc_lines = nil + elseif + vim.tbl_contains({ + 'diagnostic', + 'cast', + 'overload', + 'meta', + }, kind) + then + -- Ignore + return + elseif kind == 'generic' then + cur_obj.generics = cur_obj.generics or {} + cur_obj.generics[parsed.name] = parsed.type or 'any' + else + error('Unhandled' .. vim.inspect(parsed)) + end +end + +--- @param fun nvim.luacats.parser.fun +--- @return nvim.luacats.parser.field +local function fun2field(fun) + local parts = { 'fun(' } + for _, p in ipairs(fun.params or {}) do + parts[#parts + 1] = string.format('%s: %s', p.name, p.type) + end + parts[#parts + 1] = ')' + if fun.returns then + parts[#parts + 1] = ': ' + local tys = {} --- @type string[] + for _, p in ipairs(fun.returns) do + tys[#tys + 1] = p.type + end + parts[#parts + 1] = table.concat(tys, ', ') + end + + return { + name = fun.name, + type = table.concat(parts, ''), + access = fun.access, + desc = fun.desc, + } +end + +--- Function to normalize known form for declaring functions and normalize into a more standard +--- form. +--- @param line string +--- @return string +local function filter_decl(line) + -- M.fun = vim._memoize(function(...) + -- -> + -- function M.fun(...) + line = line:gsub('^local (.+) = .*_memoize%([^,]+, function%((.*)%)$', 'local function %1(%2)') + line = line:gsub('^(.+) = .*_memoize%([^,]+, function%((.*)%)$', 'function %1(%2)') + return line +end + +--- @param line string +--- @param state nvim.luacats.parser.State +--- @param classes table<string,nvim.luacats.parser.class> +--- @param classvars table<string,string> +--- @param has_indent boolean +local function process_lua_line(line, state, classes, classvars, has_indent) + line = filter_decl(line) + + if state.cur_obj and state.cur_obj.kind == 'class' then + local nm = line:match('^local%s+([a-zA-Z0-9_]+)%s*=') + if nm then + classvars[nm] = state.cur_obj.name + end + return + end + + do + local parent_tbl, sep, fun_or_meth_nm = + line:match('^function%s+([a-zA-Z0-9_]+)([.:])([a-zA-Z0-9_]+)%s*%(') + if parent_tbl then + -- Have a decl. Ensure cur_obj + state.cur_obj = state.cur_obj or {} + local cur_obj = assert(state.cur_obj) + + -- Match `Class:foo` methods for defined classes + local class = classvars[parent_tbl] + if class then + --- @cast cur_obj nvim.luacats.parser.fun + cur_obj.name = fun_or_meth_nm + cur_obj.class = class + cur_obj.classvar = parent_tbl + -- Add self param to methods + if sep == ':' then + cur_obj.params = cur_obj.params or {} + table.insert(cur_obj.params, 1, { + name = 'self', + type = class, + }) + end + + -- Add method as the field to the class + table.insert(classes[class].fields, fun2field(cur_obj)) + return + end + + -- Match `M.foo` + if cur_obj and parent_tbl == cur_obj.modvar then + cur_obj.name = fun_or_meth_nm + return + end + end + end + + do + -- Handle: `function A.B.C.foo(...)` + local fn_nm = line:match('^function%s+([.a-zA-Z0-9_]+)%s*%(') + if fn_nm then + state.cur_obj = state.cur_obj or {} + state.cur_obj.name = fn_nm + return + end + end + + do + -- Handle: `M.foo = {...}` where `M` is the modvar + local parent_tbl, tbl_nm = line:match('([a-zA-Z_]+)%.([a-zA-Z0-9_]+)%s*=') + if state.cur_obj and parent_tbl and parent_tbl == state.cur_obj.modvar then + state.cur_obj.name = tbl_nm + state.cur_obj.table = true + return + end + end + + do + -- Handle: `foo = {...}` + local tbl_nm = line:match('^([a-zA-Z0-9_]+)%s*=') + if tbl_nm and not has_indent then + state.cur_obj = state.cur_obj or {} + state.cur_obj.name = tbl_nm + state.cur_obj.table = true + return + end + end + + do + -- Handle: `vim.foo = {...}` + local tbl_nm = line:match('^(vim%.[a-zA-Z0-9_]+)%s*=') + if state.cur_obj and tbl_nm and not has_indent then + state.cur_obj.name = tbl_nm + state.cur_obj.table = true + return + end + end + + if state.cur_obj then + if line:find('^%s*%-%- luacheck:') then + state.cur_obj = nil + elseif line:find('^%s*local%s+') then + state.cur_obj = nil + elseif line:find('^%s*return%s+') then + state.cur_obj = nil + elseif line:find('^%s*[a-zA-Z_.]+%(%s+') then + state.cur_obj = nil + end + end +end + +--- Determine the table name used to export functions of a module +--- Usually this is `M`. +--- @param filename string +--- @return string? +local function determine_modvar(filename) + local modvar --- @type string? + for line in io.lines(filename) do + do + --- @type string? + local m = line:match('^return%s+([a-zA-Z_]+)') + if m then + modvar = m + end + end + do + --- @type string? + local m = line:match('^return%s+setmetatable%(([a-zA-Z_]+),') + if m then + modvar = m + end + end + end + return modvar +end + +--- @param obj nvim.luacats.parser.obj +--- @param funs nvim.luacats.parser.fun[] +--- @param classes table<string,nvim.luacats.parser.class> +--- @param briefs string[] +--- @param uncommitted nvim.luacats.parser.obj[] +local function commit_obj(obj, classes, funs, briefs, uncommitted) + local commit = false + if obj.kind == 'class' then + --- @cast obj nvim.luacats.parser.class + if not classes[obj.name] then + classes[obj.name] = obj + commit = true + end + elseif obj.kind == 'alias' then + -- Just pretend + commit = true + elseif obj.kind == 'brief' then + --- @cast obj nvim.luacats.parser.brief` + briefs[#briefs + 1] = obj.desc + commit = true + else + --- @cast obj nvim.luacats.parser.fun` + if obj.name then + funs[#funs + 1] = obj + commit = true + end + end + if not commit then + table.insert(uncommitted, obj) + end + return commit +end + +--- @param filename string +--- @param uncommitted nvim.luacats.parser.obj[] +-- luacheck: no unused +local function dump_uncommitted(filename, uncommitted) + local out_path = 'luacats-uncommited/' .. filename:gsub('/', '%%') .. '.txt' + if #uncommitted > 0 then + print(string.format('Could not commit %d objects in %s', #uncommitted, filename)) + vim.fn.mkdir(assert(vim.fs.dirname(out_path)), 'p') + local f = assert(io.open(out_path, 'w')) + for i, x in ipairs(uncommitted) do + f:write(i) + f:write(': ') + f:write(vim.inspect(x)) + f:write('\n') + end + f:close() + else + vim.fn.delete(out_path) + end +end + +local M = {} + +--- @param filename string +--- @return table<string,nvim.luacats.parser.class> classes +--- @return nvim.luacats.parser.fun[] funs +--- @return string[] briefs +--- @return nvim.luacats.parser.obj[] +function M.parse(filename) + local funs = {} --- @type nvim.luacats.parser.fun[] + local classes = {} --- @type table<string,nvim.luacats.parser.class> + local briefs = {} --- @type string[] + + local mod_return = determine_modvar(filename) + + --- @type string + local module = filename:match('.*/lua/([a-z_][a-z0-9_/]+)%.lua') or filename + module = module:gsub('/', '.') + + local classvars = {} --- @type table<string,string> + + local state = {} --- @type nvim.luacats.parser.State + + -- Keep track of any partial objects we don't commit + local uncommitted = {} --- @type nvim.luacats.parser.obj[] + + for line in io.lines(filename) do + local has_indent = line:match('^%s+') ~= nil + line = vim.trim(line) + if vim.startswith(line, '---') then + process_doc_line(line, state) + else + add_doc_lines_to_obj(state) + + if state.cur_obj then + state.cur_obj.modvar = mod_return + state.cur_obj.module = module + end + + process_lua_line(line, state, classes, classvars, has_indent) + + -- Commit the object + local cur_obj = state.cur_obj + if cur_obj then + if not commit_obj(cur_obj, classes, funs, briefs, uncommitted) then + --- @diagnostic disable-next-line:inject-field + cur_obj.line = line + end + end + + state = {} + end + end + + -- dump_uncommitted(filename, uncommitted) + + return classes, funs, briefs, uncommitted +end + +return M |