aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRiley Bruins <ribru17@hotmail.com>2024-09-19 13:08:22 -0700
committerChristian Clason <c.clason@uni-graz.at>2024-10-11 18:15:07 +0200
commitd3193afc2559e7d84ed2d76664a650dc03b4c6ef (patch)
treecfa3e37e11d158713ae01a691e03853509317e39
parent267c7525f738cdd6024c39da758e885c026ffaaa (diff)
downloadrneovim-d3193afc2559e7d84ed2d76664a650dc03b4c6ef.tar.gz
rneovim-d3193afc2559e7d84ed2d76664a650dc03b4c6ef.tar.bz2
rneovim-d3193afc2559e7d84ed2d76664a650dc03b4c6ef.zip
fix(treesitter): remove duplicate symbol names in language.inspect()
**Problems:** - `vim.treesitter.language.inspect()` returns duplicate symbol names, sometimes up to 6 of one kind in the case of `markdown` - The list-like `symbols` table can have holes and is thus not even a valid msgpack table anyway, mentioned in a test **Solution:** Return symbols as a map, rather than a list, where field names are the names of the symbol. The boolean value associated with the field encodes whether or not the symbol is named. Note that anonymous nodes are surrounded with double quotes (`"`) to prevent potential collisions with named counterparts that have the same identifier.
-rw-r--r--runtime/doc/treesitter.txt9
-rw-r--r--runtime/lua/vim/treesitter/_query_linter.lua8
-rw-r--r--runtime/lua/vim/treesitter/language.lua7
-rw-r--r--src/nvim/lua/treesitter.c16
-rw-r--r--test/functional/treesitter/language_spec.lua24
-rw-r--r--test/functional/treesitter/parser_spec.lua4
6 files changed, 42 insertions, 26 deletions
diff --git a/runtime/doc/treesitter.txt b/runtime/doc/treesitter.txt
index 805876172d..f9a98250ea 100644
--- a/runtime/doc/treesitter.txt
+++ b/runtime/doc/treesitter.txt
@@ -1150,8 +1150,13 @@ get_lang({filetype}) *vim.treesitter.language.get_lang()*
inspect({lang}) *vim.treesitter.language.inspect()*
Inspects the provided language.
- Inspecting provides some useful information on the language like node
- names, ...
+ Inspecting provides some useful information on the language like node and
+ field names, ABI version, and whether the language came from a WASM
+ module.
+
+ Node names are returned in a table mapping each node name to a `boolean`
+ indicating whether or not the node is named (i.e., not anonymous).
+ Anonymous nodes are surrounded with double quotes (`"`).
Parameters: ~
• {lang} (`string`) Language
diff --git a/runtime/lua/vim/treesitter/_query_linter.lua b/runtime/lua/vim/treesitter/_query_linter.lua
index c5e4b86e1e..a825505378 100644
--- a/runtime/lua/vim/treesitter/_query_linter.lua
+++ b/runtime/lua/vim/treesitter/_query_linter.lua
@@ -240,8 +240,12 @@ function M.omnifunc(findstart, base)
table.insert(items, text)
end
end
- for _, s in pairs(parser_info.symbols) do
- local text = s[2] and s[1] or string.format('%q', s[1]):gsub('\n', 'n') ---@type string
+ for text, named in
+ pairs(parser_info.symbols --[[@as table<string, boolean>]])
+ do
+ if not named then
+ text = string.format('%q', text:sub(2, -2)):gsub('\n', 'n') ---@type string
+ end
if text:find(base, 1, true) then
table.insert(items, text)
end
diff --git a/runtime/lua/vim/treesitter/language.lua b/runtime/lua/vim/treesitter/language.lua
index 9f7807e036..aa1d38df97 100644
--- a/runtime/lua/vim/treesitter/language.lua
+++ b/runtime/lua/vim/treesitter/language.lua
@@ -170,7 +170,12 @@ end
--- Inspects the provided language.
---
---- Inspecting provides some useful information on the language like node names, ...
+--- Inspecting provides some useful information on the language like node and field names, ABI
+--- version, and whether the language came from a WASM module.
+---
+--- Node names are returned in a table mapping each node name to a `boolean` indicating whether or
+--- not the node is named (i.e., not anonymous). Anonymous nodes are surrounded with double quotes
+--- (`"`).
---
---@param lang string Language
---@return table
diff --git a/src/nvim/lua/treesitter.c b/src/nvim/lua/treesitter.c
index 819ec41390..3ceb21b61a 100644
--- a/src/nvim/lua/treesitter.c
+++ b/src/nvim/lua/treesitter.c
@@ -271,12 +271,16 @@ int tslua_inspect_lang(lua_State *L)
// not used by the API
continue;
}
- lua_createtable(L, 2, 0); // [retval, symbols, elem]
- lua_pushstring(L, ts_language_symbol_name(lang, (TSSymbol)i));
- lua_rawseti(L, -2, 1);
- lua_pushboolean(L, t == TSSymbolTypeRegular);
- lua_rawseti(L, -2, 2); // [retval, symbols, elem]
- lua_rawseti(L, -2, (int)i); // [retval, symbols]
+ const char *name = ts_language_symbol_name(lang, (TSSymbol)i);
+ bool named = t == TSSymbolTypeRegular;
+ lua_pushboolean(L, named); // [retval, symbols, is_named]
+ if (!named) {
+ char buf[256];
+ snprintf(buf, sizeof(buf), "\"%s\"", name);
+ lua_setfield(L, -2, buf); // [retval, symbols]
+ } else {
+ lua_setfield(L, -2, name); // [retval, symbols]
+ }
}
lua_setfield(L, -2, "symbols"); // [retval]
diff --git a/test/functional/treesitter/language_spec.lua b/test/functional/treesitter/language_spec.lua
index e1e34fcecc..633a2dc725 100644
--- a/test/functional/treesitter/language_spec.lua
+++ b/test/functional/treesitter/language_spec.lua
@@ -51,7 +51,7 @@ describe('treesitter language API', function()
it('inspects language', function()
local keys, fields, symbols = unpack(exec_lua(function()
local lang = vim.treesitter.language.inspect('c')
- local keys, symbols = {}, {}
+ local keys = {}
for k, v in pairs(lang) do
if type(v) == 'boolean' then
keys[k] = v
@@ -60,12 +60,7 @@ describe('treesitter language API', function()
end
end
- -- symbols array can have "holes" and is thus not a valid msgpack array
- -- but we don't care about the numbers here (checked in the parser test)
- for _, v in pairs(lang.symbols) do
- table.insert(symbols, v)
- end
- return { keys, lang.fields, symbols }
+ return { keys, lang.fields, lang.symbols }
end))
eq({ fields = true, symbols = true, _abi_version = true, _wasm = false }, keys)
@@ -79,16 +74,19 @@ describe('treesitter language API', function()
eq(true, fset['initializer'])
local has_named, has_anonymous
- for _, s in pairs(symbols) do
- eq('string', type(s[1]))
- eq('boolean', type(s[2]))
- if s[1] == 'for_statement' and s[2] == true then
+ for symbol, named in pairs(symbols) do
+ eq('string', type(symbol))
+ eq('boolean', type(named))
+ if symbol == 'for_statement' and named == true then
has_named = true
- elseif s[1] == '|=' and s[2] == false then
+ elseif symbol == '"|="' and named == false then
has_anonymous = true
end
end
- eq({ true, true }, { has_named, has_anonymous })
+ eq(
+ { has_named = true, has_anonymous = true },
+ { has_named = has_named, has_anonymous = has_anonymous }
+ )
end)
it(
diff --git a/test/functional/treesitter/parser_spec.lua b/test/functional/treesitter/parser_spec.lua
index c8829f4785..2f8d204d36 100644
--- a/test/functional/treesitter/parser_spec.lua
+++ b/test/functional/treesitter/parser_spec.lua
@@ -42,13 +42,13 @@ describe('treesitter parser API', function()
eq('function_definition', exec_lua('return child:type()'))
eq(true, exec_lua('return child:named()'))
eq('number', type(exec_lua('return child:symbol()')))
- eq({ 'function_definition', true }, exec_lua('return lang.symbols[child:symbol()]'))
+ eq(true, exec_lua('return lang.symbols[child:type()]'))
exec_lua('anon = root:descendant_for_range(0,8,0,9)')
eq('(', exec_lua('return anon:type()'))
eq(false, exec_lua('return anon:named()'))
eq('number', type(exec_lua('return anon:symbol()')))
- eq({ '(', false }, exec_lua('return lang.symbols[anon:symbol()]'))
+ eq(false, exec_lua([=[return lang.symbols[string.format('"%s"', anon:type())]]=]))
exec_lua('descendant = root:descendant_for_range(1,2,1,12)')
eq('<node declaration>', exec_lua('return tostring(descendant)'))