aboutsummaryrefslogtreecommitdiff
path: root/runtime/lua/vim/treesitter/languagetree.lua
diff options
context:
space:
mode:
authorJosh Rahm <joshuarahm@gmail.com>2025-02-05 23:09:29 +0000
committerJosh Rahm <joshuarahm@gmail.com>2025-02-05 23:09:29 +0000
commitd5f194ce780c95821a855aca3c19426576d28ae0 (patch)
treed45f461b19f9118ad2bb1f440a7a08973ad18832 /runtime/lua/vim/treesitter/languagetree.lua
parentc5d770d311841ea5230426cc4c868e8db27300a8 (diff)
parent44740e561fc93afe3ebecfd3618bda2d2abeafb0 (diff)
downloadrneovim-rahm.tar.gz
rneovim-rahm.tar.bz2
rneovim-rahm.zip
Merge remote-tracking branch 'upstream/master' into mix_20240309HEADrahm
Diffstat (limited to 'runtime/lua/vim/treesitter/languagetree.lua')
-rw-r--r--runtime/lua/vim/treesitter/languagetree.lua356
1 files changed, 261 insertions, 95 deletions
diff --git a/runtime/lua/vim/treesitter/languagetree.lua b/runtime/lua/vim/treesitter/languagetree.lua
index 4b42164dc8..ea745c4deb 100644
--- a/runtime/lua/vim/treesitter/languagetree.lua
+++ b/runtime/lua/vim/treesitter/languagetree.lua
@@ -44,6 +44,8 @@ local query = require('vim.treesitter.query')
local language = require('vim.treesitter.language')
local Range = require('vim.treesitter._range')
+local default_parse_timeout_ms = 3
+
---@alias TSCallbackName
---| 'changedtree'
---| 'bytes'
@@ -58,6 +60,8 @@ local Range = require('vim.treesitter._range')
---| 'on_child_added'
---| 'on_child_removed'
+---@alias ParserThreadState { timeout: integer? }
+
--- @type table<TSCallbackNameOn,TSCallbackName>
local TSCallbackNames = {
on_changedtree = 'changedtree',
@@ -76,8 +80,13 @@ local TSCallbackNames = {
---@field private _injections_processed boolean
---@field private _opts table Options
---@field private _parser TSParser Parser for language
----@field private _has_regions boolean
+---Table of regions for which the tree is currently running an async parse
+---@field private _ranges_being_parsed table<string, boolean>
+---Table of callback queues, keyed by each region for which the callbacks should be run
+---@field private _cb_queues table<string, fun(err?: string, trees?: table<integer, TSTree>)[]>
---@field private _regions table<integer, Range6[]>?
+---The total number of regions. Since _regions can have holes, we cannot simply read this value from #_regions.
+---@field private _num_regions integer
---List of regions this tree should manage and parse. If nil then regions are
---taken from _trees. This is mostly a short-lived cache for included_regions()
---@field private _lang string Language name
@@ -85,7 +94,8 @@ local TSCallbackNames = {
---@field private _source (integer|string) Buffer or string to parse
---@field private _trees table<integer, TSTree> Reference to parsed tree (one for each language).
---Each key is the index of region, which is synced with _regions and _valid.
----@field private _valid boolean|table<integer,boolean> If the parsed tree is valid
+---@field private _valid_regions table<integer,true> Set of valid region IDs.
+---@field private _is_entirely_valid boolean Whether the entire tree (excluding children) is valid.
---@field private _logger? fun(logtype: string, msg: string)
---@field private _logfile? file*
local LanguageTree = {}
@@ -117,7 +127,7 @@ function LanguageTree.new(source, lang, opts)
local injections = opts.injections or {}
- --- @type vim.treesitter.LanguageTree
+ --- @class vim.treesitter.LanguageTree
local self = {
_source = source,
_lang = lang,
@@ -126,10 +136,13 @@ function LanguageTree.new(source, lang, opts)
_opts = opts,
_injection_query = injections[lang] and query.parse(lang, injections[lang])
or query.get(lang, 'injections'),
- _has_regions = false,
_injections_processed = false,
- _valid = false,
+ _valid_regions = {},
+ _num_regions = 1,
+ _is_entirely_valid = false,
_parser = vim._create_ts_parser(lang),
+ _ranges_being_parsed = {},
+ _cb_queues = {},
_callbacks = {},
_callbacks_rec = {},
}
@@ -182,7 +195,7 @@ end
---Measure execution time of a function
---@generic R1, R2, R3
----@param f fun(): R1, R2, R2
+---@param f fun(): R1, R2, R3
---@return number, R1, R2, R3
local function tcall(f, ...)
local start = vim.uv.hrtime()
@@ -190,6 +203,7 @@ local function tcall(f, ...)
local r = { f(...) }
--- @type number
local duration = (vim.uv.hrtime() - start) / 1000000
+ --- @diagnostic disable-next-line: redundant-return-value
return duration, unpack(r)
end
@@ -231,7 +245,9 @@ end
--- tree in treesitter. Doesn't clear filesystem cache. Called often, so needs to be fast.
---@param reload boolean|nil
function LanguageTree:invalidate(reload)
- self._valid = false
+ self._valid_regions = {}
+ self._is_entirely_valid = false
+ self._parser:reset()
-- buffer was reloaded, reparse all trees
if reload then
@@ -258,20 +274,51 @@ function LanguageTree:trees()
end
--- Gets the language of this tree node.
+--- @return string
function LanguageTree:lang()
return self._lang
end
+--- @param region Range6[]
+--- @param range? boolean|Range
+--- @return boolean
+local function intercepts_region(region, range)
+ if #region == 0 then
+ return true
+ end
+
+ if range == nil then
+ return false
+ end
+
+ if type(range) == 'boolean' then
+ return range
+ end
+
+ for _, r in ipairs(region) do
+ if Range.intercepts(r, range) then
+ return true
+ end
+ end
+
+ return false
+end
+
--- Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()| reflects the latest
--- state of the source. If invalid, user should call |LanguageTree:parse()|.
----@param exclude_children boolean|nil whether to ignore the validity of children (default `false`)
+---@param exclude_children boolean? whether to ignore the validity of children (default `false`)
+---@param range Range? range to check for validity
---@return boolean
-function LanguageTree:is_valid(exclude_children)
- local valid = self._valid
+function LanguageTree:is_valid(exclude_children, range)
+ local valid_regions = self._valid_regions
- if type(valid) == 'table' then
- for i, _ in pairs(self:included_regions()) do
- if not valid[i] then
+ if not self._is_entirely_valid then
+ if not range then
+ return false
+ end
+ -- TODO: Efficiently search for possibly intersecting regions using a binary search
+ for i, region in pairs(self:included_regions()) do
+ if not valid_regions[i] and intercepts_region(region, range) then
return false
end
end
@@ -283,97 +330,81 @@ function LanguageTree:is_valid(exclude_children)
end
for _, child in pairs(self._children) do
- if not child:is_valid(exclude_children) then
+ if not child:is_valid(exclude_children, range) then
return false
end
end
end
- if type(valid) == 'boolean' then
- return valid
- end
-
- self._valid = true
return true
end
--- Returns a map of language to child tree.
+--- @return table<string,vim.treesitter.LanguageTree>
function LanguageTree:children()
return self._children
end
--- Returns the source content of the language tree (bufnr or string).
+--- @return integer|string
function LanguageTree:source()
return self._source
end
---- @param region Range6[]
---- @param range? boolean|Range
---- @return boolean
-local function intercepts_region(region, range)
- if #region == 0 then
- return true
- end
-
- if range == nil then
- return false
- end
-
- if type(range) == 'boolean' then
- return range
- end
-
- for _, r in ipairs(region) do
- if Range.intercepts(r, range) then
- return true
- end
- end
-
- return false
-end
-
--- @private
--- @param range boolean|Range?
+--- @param thread_state ParserThreadState
--- @return Range6[] changes
--- @return integer no_regions_parsed
--- @return number total_parse_time
-function LanguageTree:_parse_regions(range)
+--- @return boolean finished whether async parsing still needs time
+function LanguageTree:_parse_regions(range, thread_state)
local changes = {}
local no_regions_parsed = 0
local total_parse_time = 0
- if type(self._valid) ~= 'table' then
- self._valid = {}
- end
-
-- If there are no ranges, set to an empty list
-- so the included ranges in the parser are cleared.
for i, ranges in pairs(self:included_regions()) do
if
- not self._valid[i]
+ not self._valid_regions[i]
and (
intercepts_region(ranges, range)
or (self._trees[i] and intercepts_region(self._trees[i]:included_ranges(false), range))
)
then
self._parser:set_included_ranges(ranges)
+ self._parser:set_timeout(thread_state.timeout and thread_state.timeout * 1000 or 0) -- ms -> micros
+
local parse_time, tree, tree_changes =
tcall(self._parser.parse, self._parser, self._trees[i], self._source, true)
+ while true do
+ if tree then
+ break
+ end
+ coroutine.yield(changes, no_regions_parsed, total_parse_time, false)
- -- Pass ranges if this is an initial parse
- local cb_changes = self._trees[i] and tree_changes or tree:included_ranges(true)
+ parse_time, tree, tree_changes =
+ tcall(self._parser.parse, self._parser, self._trees[i], self._source, true)
+ end
- self:_do_callback('changedtree', cb_changes, tree)
+ self:_do_callback('changedtree', tree_changes, tree)
self._trees[i] = tree
vim.list_extend(changes, tree_changes)
total_parse_time = total_parse_time + parse_time
no_regions_parsed = no_regions_parsed + 1
- self._valid[i] = true
+ self._valid_regions[i] = true
+
+ -- _valid_regions can have holes, but that is okay because this equality is only true when it
+ -- has no holes (meaning all regions are valid)
+ if #self._valid_regions == self._num_regions then
+ self._is_entirely_valid = true
+ end
end
end
- return changes, no_regions_parsed, total_parse_time
+ return changes, no_regions_parsed, total_parse_time, true
end
--- @private
@@ -409,6 +440,98 @@ function LanguageTree:_add_injections()
return query_time
end
+--- @param range boolean|Range?
+--- @return string
+local function range_to_string(range)
+ return type(range) == 'table' and table.concat(range, ',') or tostring(range)
+end
+
+--- @private
+--- @param range boolean|Range?
+--- @param callback fun(err?: string, trees?: table<integer, TSTree>)
+function LanguageTree:_push_async_callback(range, callback)
+ local key = range_to_string(range)
+ self._cb_queues[key] = self._cb_queues[key] or {}
+ local queue = self._cb_queues[key]
+ queue[#queue + 1] = callback
+end
+
+--- @private
+--- @param range boolean|Range?
+--- @param err? string
+--- @param trees? table<integer, TSTree>
+function LanguageTree:_run_async_callbacks(range, err, trees)
+ local key = range_to_string(range)
+ for _, cb in ipairs(self._cb_queues[key]) do
+ cb(err, trees)
+ end
+ self._ranges_being_parsed[key] = nil
+ self._cb_queues[key] = nil
+end
+
+--- Run an asynchronous parse, calling {on_parse} when complete.
+---
+--- @private
+--- @param range boolean|Range?
+--- @param on_parse fun(err?: string, trees?: table<integer, TSTree>)
+--- @return table<integer, TSTree>? trees the list of parsed trees, if parsing completed synchronously
+function LanguageTree:_async_parse(range, on_parse)
+ self:_push_async_callback(range, on_parse)
+
+ -- If we are already running an async parse, just queue the callback.
+ local range_string = range_to_string(range)
+ if not self._ranges_being_parsed[range_string] then
+ self._ranges_being_parsed[range_string] = true
+ else
+ return
+ end
+
+ local source = self._source
+ local is_buffer_parser = type(source) == 'number'
+ local buf = is_buffer_parser and vim.b[source] or nil
+ local ct = is_buffer_parser and buf.changedtick or nil
+ local total_parse_time = 0
+ local redrawtime = vim.o.redrawtime
+
+ local thread_state = {} ---@type ParserThreadState
+
+ ---@type fun(): table<integer, TSTree>, boolean
+ local parse = coroutine.wrap(self._parse)
+
+ local function step()
+ if is_buffer_parser then
+ if
+ not vim.api.nvim_buf_is_valid(source --[[@as number]])
+ then
+ return nil
+ end
+
+ -- If buffer was changed in the middle of parsing, reset parse state
+ if buf.changedtick ~= ct then
+ ct = buf.changedtick
+ total_parse_time = 0
+ parse = coroutine.wrap(self._parse)
+ end
+ end
+
+ thread_state.timeout = not vim.g._ts_force_sync_parsing and default_parse_timeout_ms or nil
+ local parse_time, trees, finished = tcall(parse, self, range, thread_state)
+ total_parse_time = total_parse_time + parse_time
+
+ if finished then
+ self:_run_async_callbacks(range, nil, trees)
+ return trees
+ elseif total_parse_time > redrawtime then
+ self:_run_async_callbacks(range, 'TIMEOUT', nil)
+ return nil
+ else
+ vim.schedule(step)
+ end
+ end
+
+ return step()
+end
+
--- Recursively parse all regions in the language tree using |treesitter-parsers|
--- for the corresponding languages and run injection queries on the parsed trees
--- to determine whether child trees should be created and parsed.
@@ -420,11 +543,33 @@ end
--- Set to `true` to run a complete parse of the source (Note: Can be slow!)
--- Set to `false|nil` to only parse regions with empty ranges (typically
--- only the root tree without injections).
---- @return table<integer, TSTree>
-function LanguageTree:parse(range)
- if self:is_valid() then
+--- @param on_parse fun(err?: string, trees?: table<integer, TSTree>)? Function invoked when parsing completes.
+--- When provided and `vim.g._ts_force_sync_parsing` is not set, parsing will run
+--- asynchronously. The first argument to the function is a string representing the error type,
+--- in case of a failure (currently only possible for timeouts). The second argument is the list
+--- of trees returned by the parse (upon success), or `nil` if the parse timed out (determined
+--- by 'redrawtime').
+---
+--- If parsing was still able to finish synchronously (within 3ms), `parse()` returns the list
+--- of trees. Otherwise, it returns `nil`.
+--- @return table<integer, TSTree>?
+function LanguageTree:parse(range, on_parse)
+ if on_parse then
+ return self:_async_parse(range, on_parse)
+ end
+ local trees, _ = self:_parse(range, {})
+ return trees
+end
+
+--- @private
+--- @param range boolean|Range|nil
+--- @param thread_state ParserThreadState
+--- @return table<integer, TSTree> trees
+--- @return boolean finished
+function LanguageTree:_parse(range, thread_state)
+ if self:is_valid(nil, type(range) == 'table' and range or nil) then
self:_log('valid')
- return self._trees
+ return self._trees, true
end
local changes --- @type Range6[]?
@@ -435,15 +580,27 @@ function LanguageTree:parse(range)
local total_parse_time = 0
-- At least 1 region is invalid
- if not self:is_valid(true) then
- changes, no_regions_parsed, total_parse_time = self:_parse_regions(range)
+ if not self:is_valid(true, type(range) == 'table' and range or nil) then
+ ---@type fun(self: vim.treesitter.LanguageTree, range: boolean|Range?, thread_state: ParserThreadState): Range6[], integer, number, boolean
+ local parse_regions = coroutine.wrap(self._parse_regions)
+ while true do
+ local is_finished
+ changes, no_regions_parsed, total_parse_time, is_finished =
+ parse_regions(self, range, thread_state)
+ thread_state.timeout = thread_state.timeout
+ and math.max(thread_state.timeout - total_parse_time, 0)
+ if is_finished then
+ break
+ end
+ coroutine.yield(self._trees, false)
+ end
-- Need to run injections when we parsed something
if no_regions_parsed > 0 then
self._injections_processed = false
end
end
- if not self._injections_processed and range ~= false and range ~= nil then
+ if not self._injections_processed and range then
query_time = self:_add_injections()
self._injections_processed = true
end
@@ -457,10 +614,24 @@ function LanguageTree:parse(range)
})
for _, child in pairs(self._children) do
- child:parse(range)
+ if thread_state.timeout == 0 then
+ coroutine.yield(self._trees, false)
+ end
+
+ ---@type fun(): table<integer, TSTree>, boolean
+ local parse = coroutine.wrap(child._parse)
+
+ while true do
+ local ctime, _, child_finished = tcall(parse, child, range, thread_state)
+ if child_finished then
+ thread_state.timeout = thread_state.timeout and math.max(thread_state.timeout - ctime, 0)
+ break
+ end
+ coroutine.yield(self._trees, child_finished)
+ end
end
- return self._trees
+ return self._trees, true
end
--- Invokes the callback for each |LanguageTree| recursively.
@@ -504,7 +675,8 @@ function LanguageTree:add_child(lang)
return self._children[lang]
end
---- @package
+---Returns the parent tree. `nil` for the root tree.
+---@return vim.treesitter.LanguageTree?
function LanguageTree:parent()
return self._parent
end
@@ -551,38 +723,34 @@ end
---region is valid or not.
---@param fn fun(index: integer, region: Range6[]): boolean
function LanguageTree:_iter_regions(fn)
- if not self._valid then
+ if vim.deep_equal(self._valid_regions, {}) then
return
end
- local was_valid = type(self._valid) ~= 'table'
-
- if was_valid then
- self:_log('was valid', self._valid)
- self._valid = {}
+ if self._is_entirely_valid then
+ self:_log('was valid')
end
local all_valid = true
for i, region in pairs(self:included_regions()) do
- if was_valid or self._valid[i] then
- self._valid[i] = fn(i, region)
- if not self._valid[i] then
+ if self._valid_regions[i] then
+ -- Setting this to nil rather than false allows us to determine if all regions were parsed
+ -- just by checking the length of _valid_regions.
+ self._valid_regions[i] = fn(i, region) and true or nil
+ if not self._valid_regions[i] then
self:_log(function()
return 'invalidating region', i, region_tostr(region)
end)
end
end
- if not self._valid[i] then
+ if not self._valid_regions[i] then
all_valid = false
end
end
- -- Compress the valid value to 'true' if there are no invalid regions
- if all_valid then
- self._valid = all_valid
- end
+ self._is_entirely_valid = all_valid
end
--- Sets the included regions that should be parsed by this |LanguageTree|.
@@ -602,14 +770,13 @@ end
---@private
---@param new_regions (Range4|Range6|TSNode)[][] List of regions this tree should manage and parse.
function LanguageTree:set_included_regions(new_regions)
- self._has_regions = true
-
-- Transform the tables from 4 element long to 6 element long (with byte offset)
for _, region in ipairs(new_regions) do
for i, range in ipairs(region) do
if type(range) == 'table' and #range == 4 then
region[i] = Range.add_bytes(self._source, range --[[@as Range4]])
elseif type(range) == 'userdata' then
+ --- @diagnostic disable-next-line: missing-fields LuaLS varargs bug
region[i] = { range:range(true) }
end
end
@@ -633,6 +800,7 @@ function LanguageTree:set_included_regions(new_regions)
end
self._regions = new_regions
+ self._num_regions = #new_regions
end
---Gets the set of included regions managed by this LanguageTree. This can be different from the
@@ -646,18 +814,8 @@ function LanguageTree:included_regions()
return self._regions
end
- if not self._has_regions then
- -- treesitter.c will default empty ranges to { -1, -1, -1, -1, -1, -1} (the full range)
- return { {} }
- end
-
- local regions = {} ---@type Range6[][]
- for i, _ in pairs(self._trees) do
- regions[i] = self._trees[i]:included_ranges(true)
- end
-
- self._regions = regions
- return regions
+ -- treesitter.c will default empty ranges to { -1, -1, -1, -1, -1, -1} (the full range)
+ return { {} }
end
---@param node TSNode
@@ -821,7 +979,7 @@ end
--- @private
--- @return table<string, Range6[][]>
function LanguageTree:_get_injections()
- if not self._injection_query then
+ if not self._injection_query or #self._injection_query.captures == 0 then
return {}
end
@@ -907,7 +1065,15 @@ function LanguageTree:_edit(
)
end
- self._regions = nil
+ self._parser:reset()
+
+ if self._regions then
+ local regions = {} ---@type table<integer, Range6[]>
+ for i, tree in pairs(self._trees) do
+ regions[i] = tree:included_ranges(true)
+ end
+ self._regions = regions
+ end
local changed_range = {
start_row,