diff options
author | Lewis Russell <lewis6991@gmail.com> | 2023-03-08 11:03:11 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-03-08 11:03:11 +0000 |
commit | ddd257f75301a50c177fc24a693d39a45b47a689 (patch) | |
tree | 052f09c521d467c31919ba6b67b085cf4bb234d3 | |
parent | 06aed7c1776e9db769c77ce836c1995128a6afc6 (diff) | |
download | rneovim-ddd257f75301a50c177fc24a693d39a45b47a689.tar.gz rneovim-ddd257f75301a50c177fc24a693d39a45b47a689.tar.bz2 rneovim-ddd257f75301a50c177fc24a693d39a45b47a689.zip |
feat(treesitter): use upstream format for injection queries
-rw-r--r-- | runtime/doc/news.txt | 4 | ||||
-rw-r--r-- | runtime/doc/treesitter.txt | 46 | ||||
-rw-r--r-- | runtime/lua/vim/treesitter/_meta.lua | 8 | ||||
-rw-r--r-- | runtime/lua/vim/treesitter/languagetree.lua | 240 | ||||
-rw-r--r-- | runtime/lua/vim/treesitter/query.lua | 21 | ||||
-rw-r--r-- | runtime/queries/c/injections.scm | 6 | ||||
-rw-r--r-- | runtime/queries/help/injections.scm | 7 | ||||
-rw-r--r-- | runtime/queries/lua/injections.scm | 16 | ||||
-rw-r--r-- | runtime/queries/vim/injections.scm | 46 |
9 files changed, 283 insertions, 111 deletions
diff --git a/runtime/doc/news.txt b/runtime/doc/news.txt index f5b9f39d93..04ae9360a0 100644 --- a/runtime/doc/news.txt +++ b/runtime/doc/news.txt @@ -196,6 +196,10 @@ The following new APIs or features were added. • Added an omnifunc implementation for lua, |vim.lua_omnifunc()| +• Treesitter injection queries now use the format described at + https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection . + Support for the previous format will be removed in a future release. + ============================================================================== CHANGED FEATURES *news-changes* diff --git a/runtime/doc/treesitter.txt b/runtime/doc/treesitter.txt index 1f78e4d5d9..3b0936941f 100644 --- a/runtime/doc/treesitter.txt +++ b/runtime/doc/treesitter.txt @@ -441,7 +441,53 @@ individual query pattern manually by setting its `"priority"` metadata attribute: > (super_important_node) @ImportantHighlight (#set! "priority" 105) + +============================================================================== +TREESITTER LANGUAGE INJECTIONS *treesitter-language-injections* < + +Note the following information is adapted from: + https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection + +Some source files contain code written in multiple different languages. +Examples include: + + • HTML files, which can contain JavaScript inside of `<script>` tags and + CSS inside of `<style>` tags + • ERB files, which contain Ruby inside of `<%` `%>` tags, and HTML outside of + those tags + • PHP files, which can contain HTML between the `<php` tags + • JavaScript files, which contain regular expression syntax within regex + literals + • Ruby, which can contain snippets of code inside of heredoc literals, + where the heredoc delimiter often indicates the language + • Lua, which can contain snippets of Vimscript inside |vim.cmd()| calls. + • Vimscript, which can contain snippets of Lua inside |:lua-heredoc| + blocks. + +All of these examples can be modeled in terms of a parent syntax tree and one +or more injected syntax trees, which reside inside of certain nodes in the +parent tree. The language injection query allows you to specify these +“injections” using the following captures: + + • `@injection.content` - indicates that the captured node should have its + contents re-parsed using another language. + • `@injection.language` - indicates that the captured node’s text may + contain the name of a language that should be used to re-parse the + `@injection.content`. + +The language injection behavior can also be configured by some properties +associated with patterns: + + • `injection.language` - can be used to hard-code the name of a specific + language. + • `injection.combined` - indicates that all of the matching nodes in the + tree should have their content parsed as one nested document. + • `injection.include-children` - indicates that the `@injection.content` + node's entire text should be re-parsed, including the text of its child + nodes. By default, child nodes' text will be excluded from the injected + document. + ============================================================================== VIM.TREESITTER *lua-treesitter* diff --git a/runtime/lua/vim/treesitter/_meta.lua b/runtime/lua/vim/treesitter/_meta.lua index 731a5ebf9f..ad0854706b 100644 --- a/runtime/lua/vim/treesitter/_meta.lua +++ b/runtime/lua/vim/treesitter/_meta.lua @@ -14,7 +14,7 @@ ---@field child_count fun(self: TSNode): integer ---@field named_child_count fun(self: TSNode): integer ---@field child fun(self: TSNode, integer): TSNode ----@field name_child fun(self: TSNode, integer): TSNode +---@field named_child fun(self: TSNode, integer): TSNode ---@field descendant_for_range fun(self: TSNode, integer, integer, integer, integer): TSNode ---@field named_descendant_for_range fun(self: TSNode, integer, integer, integer, integer): TSNode ---@field parent fun(self: TSNode): TSNode @@ -43,10 +43,10 @@ function TSNode:_rawquery(query, captures, start, end_) end function TSNode:_rawquery(query, captures, start, end_) end ---@class TSParser ----@field parse fun(self: TSParser, tree, source: integer|string): TSTree, integer[] +---@field parse fun(self: TSParser, tree, source: integer|string): TSTree, Range4[] ---@field reset fun(self: TSParser) ----@field included_ranges fun(self: TSParser): integer[] ----@field set_included_ranges fun(self: TSParser, ranges: integer[][]) +---@field included_ranges fun(self: TSParser): Range4[] +---@field set_included_ranges fun(self: TSParser, ranges: Range6[]) ---@field set_timeout fun(self: TSParser, timeout: integer) ---@field timeout fun(self: TSParser): integer diff --git a/runtime/lua/vim/treesitter/languagetree.lua b/runtime/lua/vim/treesitter/languagetree.lua index b8b0dd867e..fbc602486b 100644 --- a/runtime/lua/vim/treesitter/languagetree.lua +++ b/runtime/lua/vim/treesitter/languagetree.lua @@ -399,6 +399,169 @@ local function get_range_from_metadata(node, id, metadata) return { node:range() } end +---@private +--- TODO(lewis6991): cleanup of the node_range interface +---@param node TSNode +---@param id integer +---@param metadata TSMetadata +---@return Range4[] +local function get_node_ranges(node, id, metadata, include_children) + local range = get_range_from_metadata(node, id, metadata) + + if include_children then + return { range } + end + + local ranges = {} ---@type Range4[] + + local srow, scol, erow, ecol = range[1], range[2], range[3], range[4] + + for i = 0, node:named_child_count() - 1 do + local child = node:named_child(i) + local child_srow, child_scol, child_erow, child_ecol = child:range() + if child_srow > srow or child_scol > scol then + table.insert(ranges, { srow, scol, child_srow, child_scol }) + end + srow = child_erow + scol = child_ecol + end + + if erow > srow or ecol > scol then + table.insert(ranges, { srow, scol, erow, ecol }) + end + + return ranges +end + +---@alias TSInjection table<string,table<integer,table>> + +---@private +---@param t table<integer,TSInjection> +---@param tree_index integer +---@param pattern integer +---@param lang string +---@param combined boolean +---@param ranges Range4[] +local function add_injection(t, tree_index, pattern, lang, combined, ranges) + assert(type(lang) == 'string') + + -- Each tree index should be isolated from the other nodes. + if not t[tree_index] then + t[tree_index] = {} + end + + if not t[tree_index][lang] then + t[tree_index][lang] = {} + end + + -- Key this by pattern. If combined is set to true all captures of this pattern + -- will be parsed by treesitter as the same "source". + -- If combined is false, each "region" will be parsed as a single source. + if not t[tree_index][lang][pattern] then + t[tree_index][lang][pattern] = { combined = combined, regions = {} } + end + + table.insert(t[tree_index][lang][pattern].regions, ranges) +end + +---@private +---Get node text +--- +---Note: `query.get_node_text` returns string|string[]|nil so use this simple alias function +---to annotate it returns string. +--- +---TODO(lewis6991): use [at]overload annotations on `query.get_node_text` +---@param node TSNode +---@param source integer|string +---@param metadata table +---@return string +local function get_node_text(node, source, metadata) + return query.get_node_text(node, source, { metadata = metadata }) --[[@as string]] +end + +---@private +--- Extract injections according to: +--- https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection +---@param match table<integer,TSNode> +---@param metadata table +---@return string, boolean, Range4[] +function LanguageTree:_get_injection(match, metadata) + local ranges = {} ---@type Range4[] + local combined = metadata['injection.combined'] ~= nil + local lang = metadata['injection.language'] ---@type string + local include_children = metadata['injection.include-children'] ~= nil + + for id, node in pairs(match) do + local name = self._injection_query.captures[id] + + -- Lang should override any other language tag + if name == 'injection.language' then + lang = get_node_text(node, self._source, metadata[id]) + elseif name == 'injection.content' then + ranges = get_node_ranges(node, id, metadata, include_children) + end + end + + return lang, combined, ranges +end + +---@private +---@param match table<integer,TSNode> +---@param metadata table +---@return string, boolean, Range4[] +function LanguageTree:_get_injection_deprecated(match, metadata) + local lang = nil ---@type string + local ranges = {} ---@type Range4[] + local combined = metadata.combined ~= nil + + -- Directives can configure how injections are captured as well as actual node captures. + -- This allows more advanced processing for determining ranges and language resolution. + if metadata.content then + local content = metadata.content ---@type any + + -- Allow for captured nodes to be used + if type(content) == 'number' then + content = { match[content]:range() } + end + + if type(content) == 'table' and #content >= 4 then + vim.list_extend(ranges, content) + end + end + + if metadata.language then + lang = metadata.language ---@type string + end + + -- You can specify the content and language together + -- using a tag with the language, for example + -- @javascript + for id, node in pairs(match) do + local name = self._injection_query.captures[id] + + -- Lang should override any other language tag + if name == 'language' and not lang then + lang = get_node_text(node, self._source, metadata[id]) + elseif name == 'combined' then + combined = true + elseif name == 'content' and #ranges == 0 then + table.insert(ranges, get_range_from_metadata(node, id, metadata)) + -- Ignore any tags that start with "_" + -- Allows for other tags to be used in matches + elseif string.sub(name, 1, 1) ~= '_' then + if not lang then + lang = name + end + + if #ranges == 0 then + table.insert(ranges, get_range_from_metadata(node, id, metadata)) + end + end + end + + return lang, combined, ranges +end + --- Gets language injection points by language. --- --- This is where most of the injection processing occurs. @@ -406,13 +569,13 @@ end --- TODO: Allow for an offset predicate to tailor the injection range --- instead of using the entire nodes range. ---@private ----@return table<string, integer[][]> +---@return table<string, Range4[][]> function LanguageTree:_get_injections() if not self._injection_query then return {} end - ---@type table<integer,table<string,table<integer,table>>> + ---@type table<integer,TSInjection> local injections = {} for tree_index, tree in ipairs(self._trees) do @@ -422,75 +585,12 @@ function LanguageTree:_get_injections() for pattern, match, metadata in self._injection_query:iter_matches(root_node, self._source, start_line, end_line + 1) do - local lang = nil ---@type string - local ranges = {} ---@type Range4[] - local combined = metadata.combined ---@type boolean - - -- Directives can configure how injections are captured as well as actual node captures. - -- This allows more advanced processing for determining ranges and language resolution. - if metadata.content then - local content = metadata.content ---@type any - - -- Allow for captured nodes to be used - if type(content) == 'number' then - content = { match[content]:range() } - end - - if type(content) == 'table' and #content >= 4 then - vim.list_extend(ranges, content) - end - end - - if metadata.language then - lang = metadata.language ---@type string - end - - -- You can specify the content and language together - -- using a tag with the language, for example - -- @javascript - for id, node in pairs(match) do - local name = self._injection_query.captures[id] - - -- Lang should override any other language tag - if name == 'language' and not lang then - ---@diagnostic disable-next-line - lang = query.get_node_text(node, self._source, { metadata = metadata[id] }) - elseif name == 'combined' then - combined = true - elseif name == 'content' and #ranges == 0 then - table.insert(ranges, get_range_from_metadata(node, id, metadata)) - -- Ignore any tags that start with "_" - -- Allows for other tags to be used in matches - elseif string.sub(name, 1, 1) ~= '_' then - if not lang then - lang = name - end - - if #ranges == 0 then - table.insert(ranges, get_range_from_metadata(node, id, metadata)) - end - end + local lang, combined, ranges = self:_get_injection(match, metadata) + if not lang then + -- TODO(lewis6991): remove after 0.9 (#20434) + lang, combined, ranges = self:_get_injection_deprecated(match, metadata) end - - assert(type(lang) == 'string') - - -- Each tree index should be isolated from the other nodes. - if not injections[tree_index] then - injections[tree_index] = {} - end - - if not injections[tree_index][lang] then - injections[tree_index][lang] = {} - end - - -- Key this by pattern. If combined is set to true all captures of this pattern - -- will be parsed by treesitter as the same "source". - -- If combined is false, each "region" will be parsed as a single source. - if not injections[tree_index][lang][pattern] then - injections[tree_index][lang][pattern] = { combined = combined, regions = {} } - end - - table.insert(injections[tree_index][lang][pattern].regions, ranges) + add_injection(injections, tree_index, pattern, lang, combined, ranges) end end diff --git a/runtime/lua/vim/treesitter/query.lua b/runtime/lua/vim/treesitter/query.lua index 22f706585e..59894cc7f5 100644 --- a/runtime/lua/vim/treesitter/query.lua +++ b/runtime/lua/vim/treesitter/query.lua @@ -407,7 +407,7 @@ predicate_handlers['vim-match?'] = predicate_handlers['match?'] ---@field [string] integer|string ---@field range Range4 ----@alias TSDirective fun(match: TSMatch, _, _, predicate: any[], metadata: TSMetadata) +---@alias TSDirective fun(match: TSMatch, _, _, predicate: (string|integer)[], metadata: TSMetadata) -- Predicate handler receive the following arguments -- (match, pattern, bufnr, predicate) @@ -419,24 +419,17 @@ predicate_handlers['vim-match?'] = predicate_handlers['match?'] ---@type table<string,TSDirective> local directive_handlers = { ['set!'] = function(_, _, _, pred, metadata) - if #pred == 4 then - -- (#set! @capture "key" "value") - ---@diagnostic disable-next-line:no-unknown - local _, capture_id, key, value = unpack(pred) - ---@cast value integer|string - ---@cast capture_id integer - ---@cast key string + if #pred >= 3 and type(pred[2]) == 'number' then + -- (#set! @capture key value) + local capture_id, key, value = pred[2], pred[3], pred[4] if not metadata[capture_id] then metadata[capture_id] = {} end metadata[capture_id][key] = value else - ---@diagnostic disable-next-line:no-unknown - local _, key, value = unpack(pred) - ---@cast value integer|string - ---@cast key string - -- (#set! "key" "value") - metadata[key] = value + -- (#set! key value) + local key, value = pred[2], pred[3] + metadata[key] = value or true end end, -- Shifts the range of a node. diff --git a/runtime/queries/c/injections.scm b/runtime/queries/c/injections.scm index 7e9e73449d..84ae33683e 100644 --- a/runtime/queries/c/injections.scm +++ b/runtime/queries/c/injections.scm @@ -1,3 +1,5 @@ -(preproc_arg) @c +((preproc_arg) @injection.content + (#set! injection.language "c")) -; (comment) @comment +; ((comment) @injection.content +; (#set! injection.language "comment")) diff --git a/runtime/queries/help/injections.scm b/runtime/queries/help/injections.scm index 09bbe44e84..260a05d863 100644 --- a/runtime/queries/help/injections.scm +++ b/runtime/queries/help/injections.scm @@ -1,3 +1,4 @@ -(codeblock - (language) @language - (code) @content) +((codeblock + (language) @injection.language + (code) @injection.content) + (#set! injection.include-children)) diff --git a/runtime/queries/lua/injections.scm b/runtime/queries/lua/injections.scm index 69acbbbe9f..10eb4c4054 100644 --- a/runtime/queries/lua/injections.scm +++ b/runtime/queries/lua/injections.scm @@ -3,20 +3,26 @@ (identifier) @_cdef_identifier (_ _ (identifier) @_cdef_identifier) ] - arguments: (arguments (string content: _ @c))) + arguments: (arguments (string content: _ @injection.content))) + (#set! injection.language "c") (#eq? @_cdef_identifier "cdef")) ((function_call name: (_) @_vimcmd_identifier - arguments: (arguments (string content: _ @vim))) + arguments: (arguments (string content: _ @injection.content))) + (#set! injection.language "vim") (#any-of? @_vimcmd_identifier "vim.cmd" "vim.api.nvim_command" "vim.api.nvim_exec" "vim.api.nvim_cmd")) ((function_call name: (_) @_vimcmd_identifier - arguments: (arguments (string content: _ @query) .)) + arguments: (arguments (string content: _ @injection.content) .)) + (#set! injection.language "query") (#eq? @_vimcmd_identifier "vim.treesitter.query.set_query")) ; ;; highlight string as query if starts with `;; query` -; ((string ("string_content") @query) (#lua-match? @query "^%s*;+%s?query")) +; ((string ("string_content") @injection.content) +; (#set! injection.language "query") +; (#lua-match? @injection.content "^%s*;+%s?query")) -; (comment) @comment +; ((comment) @injection.content +; (#set! injection.language "comment")) diff --git a/runtime/queries/vim/injections.scm b/runtime/queries/vim/injections.scm index b53643dc1a..50f0190112 100644 --- a/runtime/queries/vim/injections.scm +++ b/runtime/queries/vim/injections.scm @@ -1,18 +1,33 @@ -(lua_statement (script (body) @lua)) -(lua_statement (chunk) @lua) -(ruby_statement (script (body) @ruby)) -(ruby_statement (chunk) @ruby) -(python_statement (script (body) @python)) -(python_statement (chunk) @python) +((lua_statement (script (body) @injection.content)) + (#set! injection.language "lua")) + +((lua_statement (chunk) @injection.content) + (#set! injection.language "lua")) + +((ruby_statement (script (body) @injection.content)) + (#set! injection.language "ruby")) + +((ruby_statement (chunk) @injection.content) + (#set! injection.language "ruby")) + +((python_statement (script (body) @injection.content)) + (#set! injection.language "python")) + +((python_statement (chunk) @injection.content) + (#set! injection.language "python")) + ;; If we support perl at some point... -;; (perl_statement (script (body) @perl)) -;; (perl_statement (chunk) @perl) +;; ((perl_statement (script (body) @injection.content)) +;; (#set! injection.language "perl")) +;; ((perl_statement (chunk) @injection.content) +;; (#set! injection.language "perl")) -(autocmd_statement (pattern) @regex) +((autocmd_statement (pattern) @injection.content) + (#set! injection.language "regex")) ((set_item option: (option_name) @_option - value: (set_value) @vim) + value: (set_value) @injection.content) (#any-of? @_option "includeexpr" "inex" "printexpr" "pexpr" @@ -22,7 +37,12 @@ "foldexpr" "fde" "diffexpr" "dex" "patchexpr" "pex" - "charconvert" "ccv")) + "charconvert" "ccv") + (#set! injection.language "vim")) + + +; ((comment) @injection.content +; (#set! injection.language "comment")) -; (comment) @comment -; (line_continuation_comment) @comment +; ((line_continuation_comment) @injection.content +; (#set! injection.language "comment")) |