aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLewis Russell <lewis6991@gmail.com>2023-03-08 11:03:11 +0000
committerGitHub <noreply@github.com>2023-03-08 11:03:11 +0000
commitddd257f75301a50c177fc24a693d39a45b47a689 (patch)
tree052f09c521d467c31919ba6b67b085cf4bb234d3
parent06aed7c1776e9db769c77ce836c1995128a6afc6 (diff)
downloadrneovim-ddd257f75301a50c177fc24a693d39a45b47a689.tar.gz
rneovim-ddd257f75301a50c177fc24a693d39a45b47a689.tar.bz2
rneovim-ddd257f75301a50c177fc24a693d39a45b47a689.zip
feat(treesitter): use upstream format for injection queries
-rw-r--r--runtime/doc/news.txt4
-rw-r--r--runtime/doc/treesitter.txt46
-rw-r--r--runtime/lua/vim/treesitter/_meta.lua8
-rw-r--r--runtime/lua/vim/treesitter/languagetree.lua240
-rw-r--r--runtime/lua/vim/treesitter/query.lua21
-rw-r--r--runtime/queries/c/injections.scm6
-rw-r--r--runtime/queries/help/injections.scm7
-rw-r--r--runtime/queries/lua/injections.scm16
-rw-r--r--runtime/queries/vim/injections.scm46
9 files changed, 283 insertions, 111 deletions
diff --git a/runtime/doc/news.txt b/runtime/doc/news.txt
index f5b9f39d93..04ae9360a0 100644
--- a/runtime/doc/news.txt
+++ b/runtime/doc/news.txt
@@ -196,6 +196,10 @@ The following new APIs or features were added.
• Added an omnifunc implementation for lua, |vim.lua_omnifunc()|
+• Treesitter injection queries now use the format described at
+ https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection .
+ Support for the previous format will be removed in a future release.
+
==============================================================================
CHANGED FEATURES *news-changes*
diff --git a/runtime/doc/treesitter.txt b/runtime/doc/treesitter.txt
index 1f78e4d5d9..3b0936941f 100644
--- a/runtime/doc/treesitter.txt
+++ b/runtime/doc/treesitter.txt
@@ -441,7 +441,53 @@ individual query pattern manually by setting its `"priority"` metadata
attribute: >
(super_important_node) @ImportantHighlight (#set! "priority" 105)
+
+==============================================================================
+TREESITTER LANGUAGE INJECTIONS *treesitter-language-injections*
<
+
+Note the following information is adapted from:
+ https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection
+
+Some source files contain code written in multiple different languages.
+Examples include:
+
+ • HTML files, which can contain JavaScript inside of `<script>` tags and
+ CSS inside of `<style>` tags
+ • ERB files, which contain Ruby inside of `<%` `%>` tags, and HTML outside of
+ those tags
+ • PHP files, which can contain HTML between the `<php` tags
+ • JavaScript files, which contain regular expression syntax within regex
+ literals
+ • Ruby, which can contain snippets of code inside of heredoc literals,
+ where the heredoc delimiter often indicates the language
+ • Lua, which can contain snippets of Vimscript inside |vim.cmd()| calls.
+ • Vimscript, which can contain snippets of Lua inside |:lua-heredoc|
+ blocks.
+
+All of these examples can be modeled in terms of a parent syntax tree and one
+or more injected syntax trees, which reside inside of certain nodes in the
+parent tree. The language injection query allows you to specify these
+“injections” using the following captures:
+
+ • `@injection.content` - indicates that the captured node should have its
+ contents re-parsed using another language.
+ • `@injection.language` - indicates that the captured node’s text may
+ contain the name of a language that should be used to re-parse the
+ `@injection.content`.
+
+The language injection behavior can also be configured by some properties
+associated with patterns:
+
+ • `injection.language` - can be used to hard-code the name of a specific
+ language.
+ • `injection.combined` - indicates that all of the matching nodes in the
+ tree should have their content parsed as one nested document.
+ • `injection.include-children` - indicates that the `@injection.content`
+ node's entire text should be re-parsed, including the text of its child
+ nodes. By default, child nodes' text will be excluded from the injected
+ document.
+
==============================================================================
VIM.TREESITTER *lua-treesitter*
diff --git a/runtime/lua/vim/treesitter/_meta.lua b/runtime/lua/vim/treesitter/_meta.lua
index 731a5ebf9f..ad0854706b 100644
--- a/runtime/lua/vim/treesitter/_meta.lua
+++ b/runtime/lua/vim/treesitter/_meta.lua
@@ -14,7 +14,7 @@
---@field child_count fun(self: TSNode): integer
---@field named_child_count fun(self: TSNode): integer
---@field child fun(self: TSNode, integer): TSNode
----@field name_child fun(self: TSNode, integer): TSNode
+---@field named_child fun(self: TSNode, integer): TSNode
---@field descendant_for_range fun(self: TSNode, integer, integer, integer, integer): TSNode
---@field named_descendant_for_range fun(self: TSNode, integer, integer, integer, integer): TSNode
---@field parent fun(self: TSNode): TSNode
@@ -43,10 +43,10 @@ function TSNode:_rawquery(query, captures, start, end_) end
function TSNode:_rawquery(query, captures, start, end_) end
---@class TSParser
----@field parse fun(self: TSParser, tree, source: integer|string): TSTree, integer[]
+---@field parse fun(self: TSParser, tree, source: integer|string): TSTree, Range4[]
---@field reset fun(self: TSParser)
----@field included_ranges fun(self: TSParser): integer[]
----@field set_included_ranges fun(self: TSParser, ranges: integer[][])
+---@field included_ranges fun(self: TSParser): Range4[]
+---@field set_included_ranges fun(self: TSParser, ranges: Range6[])
---@field set_timeout fun(self: TSParser, timeout: integer)
---@field timeout fun(self: TSParser): integer
diff --git a/runtime/lua/vim/treesitter/languagetree.lua b/runtime/lua/vim/treesitter/languagetree.lua
index b8b0dd867e..fbc602486b 100644
--- a/runtime/lua/vim/treesitter/languagetree.lua
+++ b/runtime/lua/vim/treesitter/languagetree.lua
@@ -399,6 +399,169 @@ local function get_range_from_metadata(node, id, metadata)
return { node:range() }
end
+---@private
+--- TODO(lewis6991): cleanup of the node_range interface
+---@param node TSNode
+---@param id integer
+---@param metadata TSMetadata
+---@return Range4[]
+local function get_node_ranges(node, id, metadata, include_children)
+ local range = get_range_from_metadata(node, id, metadata)
+
+ if include_children then
+ return { range }
+ end
+
+ local ranges = {} ---@type Range4[]
+
+ local srow, scol, erow, ecol = range[1], range[2], range[3], range[4]
+
+ for i = 0, node:named_child_count() - 1 do
+ local child = node:named_child(i)
+ local child_srow, child_scol, child_erow, child_ecol = child:range()
+ if child_srow > srow or child_scol > scol then
+ table.insert(ranges, { srow, scol, child_srow, child_scol })
+ end
+ srow = child_erow
+ scol = child_ecol
+ end
+
+ if erow > srow or ecol > scol then
+ table.insert(ranges, { srow, scol, erow, ecol })
+ end
+
+ return ranges
+end
+
+---@alias TSInjection table<string,table<integer,table>>
+
+---@private
+---@param t table<integer,TSInjection>
+---@param tree_index integer
+---@param pattern integer
+---@param lang string
+---@param combined boolean
+---@param ranges Range4[]
+local function add_injection(t, tree_index, pattern, lang, combined, ranges)
+ assert(type(lang) == 'string')
+
+ -- Each tree index should be isolated from the other nodes.
+ if not t[tree_index] then
+ t[tree_index] = {}
+ end
+
+ if not t[tree_index][lang] then
+ t[tree_index][lang] = {}
+ end
+
+ -- Key this by pattern. If combined is set to true all captures of this pattern
+ -- will be parsed by treesitter as the same "source".
+ -- If combined is false, each "region" will be parsed as a single source.
+ if not t[tree_index][lang][pattern] then
+ t[tree_index][lang][pattern] = { combined = combined, regions = {} }
+ end
+
+ table.insert(t[tree_index][lang][pattern].regions, ranges)
+end
+
+---@private
+---Get node text
+---
+---Note: `query.get_node_text` returns string|string[]|nil so use this simple alias function
+---to annotate it returns string.
+---
+---TODO(lewis6991): use [at]overload annotations on `query.get_node_text`
+---@param node TSNode
+---@param source integer|string
+---@param metadata table
+---@return string
+local function get_node_text(node, source, metadata)
+ return query.get_node_text(node, source, { metadata = metadata }) --[[@as string]]
+end
+
+---@private
+--- Extract injections according to:
+--- https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection
+---@param match table<integer,TSNode>
+---@param metadata table
+---@return string, boolean, Range4[]
+function LanguageTree:_get_injection(match, metadata)
+ local ranges = {} ---@type Range4[]
+ local combined = metadata['injection.combined'] ~= nil
+ local lang = metadata['injection.language'] ---@type string
+ local include_children = metadata['injection.include-children'] ~= nil
+
+ for id, node in pairs(match) do
+ local name = self._injection_query.captures[id]
+
+ -- Lang should override any other language tag
+ if name == 'injection.language' then
+ lang = get_node_text(node, self._source, metadata[id])
+ elseif name == 'injection.content' then
+ ranges = get_node_ranges(node, id, metadata, include_children)
+ end
+ end
+
+ return lang, combined, ranges
+end
+
+---@private
+---@param match table<integer,TSNode>
+---@param metadata table
+---@return string, boolean, Range4[]
+function LanguageTree:_get_injection_deprecated(match, metadata)
+ local lang = nil ---@type string
+ local ranges = {} ---@type Range4[]
+ local combined = metadata.combined ~= nil
+
+ -- Directives can configure how injections are captured as well as actual node captures.
+ -- This allows more advanced processing for determining ranges and language resolution.
+ if metadata.content then
+ local content = metadata.content ---@type any
+
+ -- Allow for captured nodes to be used
+ if type(content) == 'number' then
+ content = { match[content]:range() }
+ end
+
+ if type(content) == 'table' and #content >= 4 then
+ vim.list_extend(ranges, content)
+ end
+ end
+
+ if metadata.language then
+ lang = metadata.language ---@type string
+ end
+
+ -- You can specify the content and language together
+ -- using a tag with the language, for example
+ -- @javascript
+ for id, node in pairs(match) do
+ local name = self._injection_query.captures[id]
+
+ -- Lang should override any other language tag
+ if name == 'language' and not lang then
+ lang = get_node_text(node, self._source, metadata[id])
+ elseif name == 'combined' then
+ combined = true
+ elseif name == 'content' and #ranges == 0 then
+ table.insert(ranges, get_range_from_metadata(node, id, metadata))
+ -- Ignore any tags that start with "_"
+ -- Allows for other tags to be used in matches
+ elseif string.sub(name, 1, 1) ~= '_' then
+ if not lang then
+ lang = name
+ end
+
+ if #ranges == 0 then
+ table.insert(ranges, get_range_from_metadata(node, id, metadata))
+ end
+ end
+ end
+
+ return lang, combined, ranges
+end
+
--- Gets language injection points by language.
---
--- This is where most of the injection processing occurs.
@@ -406,13 +569,13 @@ end
--- TODO: Allow for an offset predicate to tailor the injection range
--- instead of using the entire nodes range.
---@private
----@return table<string, integer[][]>
+---@return table<string, Range4[][]>
function LanguageTree:_get_injections()
if not self._injection_query then
return {}
end
- ---@type table<integer,table<string,table<integer,table>>>
+ ---@type table<integer,TSInjection>
local injections = {}
for tree_index, tree in ipairs(self._trees) do
@@ -422,75 +585,12 @@ function LanguageTree:_get_injections()
for pattern, match, metadata in
self._injection_query:iter_matches(root_node, self._source, start_line, end_line + 1)
do
- local lang = nil ---@type string
- local ranges = {} ---@type Range4[]
- local combined = metadata.combined ---@type boolean
-
- -- Directives can configure how injections are captured as well as actual node captures.
- -- This allows more advanced processing for determining ranges and language resolution.
- if metadata.content then
- local content = metadata.content ---@type any
-
- -- Allow for captured nodes to be used
- if type(content) == 'number' then
- content = { match[content]:range() }
- end
-
- if type(content) == 'table' and #content >= 4 then
- vim.list_extend(ranges, content)
- end
- end
-
- if metadata.language then
- lang = metadata.language ---@type string
- end
-
- -- You can specify the content and language together
- -- using a tag with the language, for example
- -- @javascript
- for id, node in pairs(match) do
- local name = self._injection_query.captures[id]
-
- -- Lang should override any other language tag
- if name == 'language' and not lang then
- ---@diagnostic disable-next-line
- lang = query.get_node_text(node, self._source, { metadata = metadata[id] })
- elseif name == 'combined' then
- combined = true
- elseif name == 'content' and #ranges == 0 then
- table.insert(ranges, get_range_from_metadata(node, id, metadata))
- -- Ignore any tags that start with "_"
- -- Allows for other tags to be used in matches
- elseif string.sub(name, 1, 1) ~= '_' then
- if not lang then
- lang = name
- end
-
- if #ranges == 0 then
- table.insert(ranges, get_range_from_metadata(node, id, metadata))
- end
- end
+ local lang, combined, ranges = self:_get_injection(match, metadata)
+ if not lang then
+ -- TODO(lewis6991): remove after 0.9 (#20434)
+ lang, combined, ranges = self:_get_injection_deprecated(match, metadata)
end
-
- assert(type(lang) == 'string')
-
- -- Each tree index should be isolated from the other nodes.
- if not injections[tree_index] then
- injections[tree_index] = {}
- end
-
- if not injections[tree_index][lang] then
- injections[tree_index][lang] = {}
- end
-
- -- Key this by pattern. If combined is set to true all captures of this pattern
- -- will be parsed by treesitter as the same "source".
- -- If combined is false, each "region" will be parsed as a single source.
- if not injections[tree_index][lang][pattern] then
- injections[tree_index][lang][pattern] = { combined = combined, regions = {} }
- end
-
- table.insert(injections[tree_index][lang][pattern].regions, ranges)
+ add_injection(injections, tree_index, pattern, lang, combined, ranges)
end
end
diff --git a/runtime/lua/vim/treesitter/query.lua b/runtime/lua/vim/treesitter/query.lua
index 22f706585e..59894cc7f5 100644
--- a/runtime/lua/vim/treesitter/query.lua
+++ b/runtime/lua/vim/treesitter/query.lua
@@ -407,7 +407,7 @@ predicate_handlers['vim-match?'] = predicate_handlers['match?']
---@field [string] integer|string
---@field range Range4
----@alias TSDirective fun(match: TSMatch, _, _, predicate: any[], metadata: TSMetadata)
+---@alias TSDirective fun(match: TSMatch, _, _, predicate: (string|integer)[], metadata: TSMetadata)
-- Predicate handler receive the following arguments
-- (match, pattern, bufnr, predicate)
@@ -419,24 +419,17 @@ predicate_handlers['vim-match?'] = predicate_handlers['match?']
---@type table<string,TSDirective>
local directive_handlers = {
['set!'] = function(_, _, _, pred, metadata)
- if #pred == 4 then
- -- (#set! @capture "key" "value")
- ---@diagnostic disable-next-line:no-unknown
- local _, capture_id, key, value = unpack(pred)
- ---@cast value integer|string
- ---@cast capture_id integer
- ---@cast key string
+ if #pred >= 3 and type(pred[2]) == 'number' then
+ -- (#set! @capture key value)
+ local capture_id, key, value = pred[2], pred[3], pred[4]
if not metadata[capture_id] then
metadata[capture_id] = {}
end
metadata[capture_id][key] = value
else
- ---@diagnostic disable-next-line:no-unknown
- local _, key, value = unpack(pred)
- ---@cast value integer|string
- ---@cast key string
- -- (#set! "key" "value")
- metadata[key] = value
+ -- (#set! key value)
+ local key, value = pred[2], pred[3]
+ metadata[key] = value or true
end
end,
-- Shifts the range of a node.
diff --git a/runtime/queries/c/injections.scm b/runtime/queries/c/injections.scm
index 7e9e73449d..84ae33683e 100644
--- a/runtime/queries/c/injections.scm
+++ b/runtime/queries/c/injections.scm
@@ -1,3 +1,5 @@
-(preproc_arg) @c
+((preproc_arg) @injection.content
+ (#set! injection.language "c"))
-; (comment) @comment
+; ((comment) @injection.content
+; (#set! injection.language "comment"))
diff --git a/runtime/queries/help/injections.scm b/runtime/queries/help/injections.scm
index 09bbe44e84..260a05d863 100644
--- a/runtime/queries/help/injections.scm
+++ b/runtime/queries/help/injections.scm
@@ -1,3 +1,4 @@
-(codeblock
- (language) @language
- (code) @content)
+((codeblock
+ (language) @injection.language
+ (code) @injection.content)
+ (#set! injection.include-children))
diff --git a/runtime/queries/lua/injections.scm b/runtime/queries/lua/injections.scm
index 69acbbbe9f..10eb4c4054 100644
--- a/runtime/queries/lua/injections.scm
+++ b/runtime/queries/lua/injections.scm
@@ -3,20 +3,26 @@
(identifier) @_cdef_identifier
(_ _ (identifier) @_cdef_identifier)
]
- arguments: (arguments (string content: _ @c)))
+ arguments: (arguments (string content: _ @injection.content)))
+ (#set! injection.language "c")
(#eq? @_cdef_identifier "cdef"))
((function_call
name: (_) @_vimcmd_identifier
- arguments: (arguments (string content: _ @vim)))
+ arguments: (arguments (string content: _ @injection.content)))
+ (#set! injection.language "vim")
(#any-of? @_vimcmd_identifier "vim.cmd" "vim.api.nvim_command" "vim.api.nvim_exec" "vim.api.nvim_cmd"))
((function_call
name: (_) @_vimcmd_identifier
- arguments: (arguments (string content: _ @query) .))
+ arguments: (arguments (string content: _ @injection.content) .))
+ (#set! injection.language "query")
(#eq? @_vimcmd_identifier "vim.treesitter.query.set_query"))
; ;; highlight string as query if starts with `;; query`
-; ((string ("string_content") @query) (#lua-match? @query "^%s*;+%s?query"))
+; ((string ("string_content") @injection.content)
+; (#set! injection.language "query")
+; (#lua-match? @injection.content "^%s*;+%s?query"))
-; (comment) @comment
+; ((comment) @injection.content
+; (#set! injection.language "comment"))
diff --git a/runtime/queries/vim/injections.scm b/runtime/queries/vim/injections.scm
index b53643dc1a..50f0190112 100644
--- a/runtime/queries/vim/injections.scm
+++ b/runtime/queries/vim/injections.scm
@@ -1,18 +1,33 @@
-(lua_statement (script (body) @lua))
-(lua_statement (chunk) @lua)
-(ruby_statement (script (body) @ruby))
-(ruby_statement (chunk) @ruby)
-(python_statement (script (body) @python))
-(python_statement (chunk) @python)
+((lua_statement (script (body) @injection.content))
+ (#set! injection.language "lua"))
+
+((lua_statement (chunk) @injection.content)
+ (#set! injection.language "lua"))
+
+((ruby_statement (script (body) @injection.content))
+ (#set! injection.language "ruby"))
+
+((ruby_statement (chunk) @injection.content)
+ (#set! injection.language "ruby"))
+
+((python_statement (script (body) @injection.content))
+ (#set! injection.language "python"))
+
+((python_statement (chunk) @injection.content)
+ (#set! injection.language "python"))
+
;; If we support perl at some point...
-;; (perl_statement (script (body) @perl))
-;; (perl_statement (chunk) @perl)
+;; ((perl_statement (script (body) @injection.content))
+;; (#set! injection.language "perl"))
+;; ((perl_statement (chunk) @injection.content)
+;; (#set! injection.language "perl"))
-(autocmd_statement (pattern) @regex)
+((autocmd_statement (pattern) @injection.content)
+ (#set! injection.language "regex"))
((set_item
option: (option_name) @_option
- value: (set_value) @vim)
+ value: (set_value) @injection.content)
(#any-of? @_option
"includeexpr" "inex"
"printexpr" "pexpr"
@@ -22,7 +37,12 @@
"foldexpr" "fde"
"diffexpr" "dex"
"patchexpr" "pex"
- "charconvert" "ccv"))
+ "charconvert" "ccv")
+ (#set! injection.language "vim"))
+
+
+; ((comment) @injection.content
+; (#set! injection.language "comment"))
-; (comment) @comment
-; (line_continuation_comment) @comment
+; ((line_continuation_comment) @injection.content
+; (#set! injection.language "comment"))