From 613068071e02ddf5958fa82974373c370112c5e6 Mon Sep 17 00:00:00 2001 From: Thomas Vigouroux Date: Tue, 14 Jul 2020 21:50:57 +0200 Subject: treesitter: refactor and use lua regexes --- runtime/lua/vim/treesitter.lua | 197 ++++++----------------------- runtime/lua/vim/treesitter/highlighter.lua | 160 +++++++++++++++++++++++ runtime/lua/vim/treesitter/language.lua | 26 ++++ runtime/lua/vim/treesitter/query.lua | 133 +++++++++++++++++++ runtime/lua/vim/tshighlighter.lua | 116 ----------------- 5 files changed, 357 insertions(+), 275 deletions(-) create mode 100644 runtime/lua/vim/treesitter/highlighter.lua create mode 100644 runtime/lua/vim/treesitter/language.lua create mode 100644 runtime/lua/vim/treesitter/query.lua delete mode 100644 runtime/lua/vim/tshighlighter.lua (limited to 'runtime/lua') diff --git a/runtime/lua/vim/treesitter.lua b/runtime/lua/vim/treesitter.lua index 927456708c..f43c8a872d 100644 --- a/runtime/lua/vim/treesitter.lua +++ b/runtime/lua/vim/treesitter.lua @@ -1,4 +1,6 @@ local a = vim.api +local query = require'vim.treesitter.query' +local language = require'vim.treesitter.language' -- TODO(bfredl): currently we retain parsers for the lifetime of the buffer. -- Consider use weak references to release parser if all plugins are done with @@ -44,42 +46,30 @@ function Parser:set_included_ranges(ranges) self.valid = false end -local M = { - parse_query = vim._ts_parse_query, -} +local M = vim.tbl_extend("error", query, language) setmetatable(M, { __index = function (t, k) if k == "TSHighlighter" then - t[k] = require'vim.tshighlighter' + a.nvim_err_writeln("vim.TSHighlighter is deprecated, please use vim.treesitter.highlighter") + t[k] = require'vim.treesitter.highlighter' + return t[k] + elseif k == "highlighter" then + t[k] = require'vim.treesitter.highlighter' return t[k] end end }) -function M.require_language(lang, path) - if vim._ts_has_language(lang) then - return true - end - if path == nil then - local fname = 'parser/' .. lang .. '.*' - local paths = a.nvim_get_runtime_file(fname, false) - if #paths == 0 then - -- TODO(bfredl): help tag? - error("no parser for '"..lang.."' language") - end - path = paths[1] - end - vim._ts_add_language(path, lang) -end - -function M.inspect_language(lang) - M.require_language(lang) - return vim._ts_inspect_language(lang) -end - -function M.create_parser(bufnr, lang, id) - M.require_language(lang) +--- Creates a new parser. +-- +-- It is not recommended to use this, use vim.treesitter.get_parser() instead. +-- +-- @param bufnr The buffer the parser will be tied to +-- @param lang The language of the parser. +-- @param id The id the parser will have +function M._create_parser(bufnr, lang, id) + language.require_language(lang) if bufnr == 0 then bufnr = a.nvim_get_current_buf() end @@ -91,8 +81,8 @@ function M.create_parser(bufnr, lang, id) self.changedtree_cbs = {} self.lines_cbs = {} self:parse() - -- TODO(bfredl): use weakref to self, so that the parser is free'd is no plugin is - -- using it. + -- TODO(bfredl): use weakref to self, so that the parser is free'd is no plugin is + -- using it. local function lines_cb(_, ...) return self:_on_lines(...) end @@ -108,17 +98,31 @@ function M.create_parser(bufnr, lang, id) return self end -function M.get_parser(bufnr, ft, buf_attach_cbs) +--- Gets the parser for this bufnr / ft combination. +-- +-- If needed this will create the parser. +-- Unconditionnally attach the provided callback +-- +-- @param bufnr The buffer the parser should be tied to +-- @param ft The filetype of this parser +-- @param buf_attach_cbs An `nvim_buf_attach`-like table argument with the following keys : +-- `on_lines` : see `nvim_buf_attach`, but this will be called _after_ the parsers callback. +-- `on_changedtree` : a callback that will be called everytime the tree has syntactical changes. +-- it will only be passed one argument, that is a table of the ranges (as node ranges) that +-- changed. +-- +-- @returns The parser +function M.get_parser(bufnr, lang, buf_attach_cbs) if bufnr == nil or bufnr == 0 then bufnr = a.nvim_get_current_buf() end - if ft == nil then - ft = a.nvim_buf_get_option(bufnr, "filetype") + if lang == nil then + lang = a.nvim_buf_get_option(bufnr, "filetype") end - local id = tostring(bufnr)..'_'..ft + local id = tostring(bufnr)..'_'..lang if parsers[id] == nil then - parsers[id] = M.create_parser(bufnr, ft, id) + parsers[id] = M._create_parser(bufnr, lang, id) end if buf_attach_cbs and buf_attach_cbs.on_changedtree then @@ -132,129 +136,4 @@ function M.get_parser(bufnr, ft, buf_attach_cbs) return parsers[id] end --- query: pattern matching on trees --- predicate matching is implemented in lua -local Query = {} -Query.__index = Query - -local magic_prefixes = {['\\v']=true, ['\\m']=true, ['\\M']=true, ['\\V']=true} -local function check_magic(str) - if string.len(str) < 2 or magic_prefixes[string.sub(str,1,2)] then - return str - end - return '\\v'..str -end - -function M.parse_query(lang, query) - M.require_language(lang) - local self = setmetatable({}, Query) - self.query = vim._ts_parse_query(lang, vim.fn.escape(query,'\\')) - self.info = self.query:inspect() - self.captures = self.info.captures - self.regexes = {} - for id,preds in pairs(self.info.patterns) do - local regexes = {} - for i, pred in ipairs(preds) do - if (pred[1] == "match?" and type(pred[2]) == "number" - and type(pred[3]) == "string") then - regexes[i] = vim.regex(check_magic(pred[3])) - end - end - if next(regexes) then - self.regexes[id] = regexes - end - end - return self -end - -local function get_node_text(node, bufnr) - local start_row, start_col, end_row, end_col = node:range() - if start_row ~= end_row then - return nil - end - local line = a.nvim_buf_get_lines(bufnr, start_row, start_row+1, true)[1] - return string.sub(line, start_col+1, end_col) -end - -function Query:match_preds(match, pattern, bufnr) - local preds = self.info.patterns[pattern] - if not preds then - return true - end - local regexes = self.regexes[pattern] - for i, pred in pairs(preds) do - -- Here we only want to return if a predicate DOES NOT match, and - -- continue on the other case. This way unknown predicates will not be considered, - -- which allows some testing and easier user extensibility (#12173). - -- Also, tree-sitter strips the leading # from predicates for us. - if pred[1] == "eq?" then - local node = match[pred[2]] - local node_text = get_node_text(node, bufnr) - - local str - if type(pred[3]) == "string" then - -- (#eq? @aa "foo") - str = pred[3] - else - -- (#eq? @aa @bb) - str = get_node_text(match[pred[3]], bufnr) - end - - if node_text ~= str or str == nil then - return false - end - elseif pred[1] == "match?" then - if not regexes or not regexes[i] then - return false - end - local node = match[pred[2]] - local start_row, start_col, end_row, end_col = node:range() - if start_row ~= end_row then - return false - end - if not regexes[i]:match_line(bufnr, start_row, start_col, end_col) then - return false - end - end - end - return true -end - -function Query:iter_captures(node, bufnr, start, stop) - if bufnr == 0 then - bufnr = vim.api.nvim_get_current_buf() - end - local raw_iter = node:_rawquery(self.query,true,start,stop) - local function iter() - local capture, captured_node, match = raw_iter() - if match ~= nil then - local active = self:match_preds(match, match.pattern, bufnr) - match.active = active - if not active then - return iter() -- tail call: try next match - end - end - return capture, captured_node - end - return iter -end - -function Query:iter_matches(node, bufnr, start, stop) - if bufnr == 0 then - bufnr = vim.api.nvim_get_current_buf() - end - local raw_iter = node:_rawquery(self.query,false,start,stop) - local function iter() - local pattern, match = raw_iter() - if match ~= nil then - local active = self:match_preds(match, pattern, bufnr) - if not active then - return iter() -- tail call: try next match - end - end - return pattern, match - end - return iter -end - return M diff --git a/runtime/lua/vim/treesitter/highlighter.lua b/runtime/lua/vim/treesitter/highlighter.lua new file mode 100644 index 0000000000..b410f01092 --- /dev/null +++ b/runtime/lua/vim/treesitter/highlighter.lua @@ -0,0 +1,160 @@ +local a = vim.api + +-- support reload for quick experimentation +local TSHighlighter = rawget(vim.treesitter, 'TSHighlighter') or {} +TSHighlighter.__index = TSHighlighter +local ts_hs_ns = a.nvim_create_namespace("treesitter_hl") + +-- These are conventions defined by tree-sitter, though it +-- needs to be user extensible also. +-- TODO(bfredl): this is very much incomplete, we will need to +-- go through a few tree-sitter provided queries and decide +-- on translations that makes the most sense. +TSHighlighter.hl_map = { + ["error"] = "Error", + +-- Miscs + ["comment"] = "Comment", + ["punctuation.delimiter"] = "Delimiter", + ["punctuation.bracket"] = "Delimiter", + ["punctuation.special"] = "Delimiter", + +-- Constants + ["constant"] = "Constant", + ["constant.builtin"] = "Special", + ["constant.macro"] = "Define", + ["string"] = "String", + ["string.regex"] = "String", + ["string.escape"] = "SpecialChar", + ["character"] = "Character", + ["number"] = "Number", + ["boolean"] = "Boolean", + ["float"] = "Float", + +-- Functions + ["function"] = "Function", + ["function.special"] = "Function", + ["function.builtin"] = "Special", + ["function.macro"] = "Macro", + ["parameter"] = "Identifier", + ["method"] = "Function", + ["field"] = "Identifier", + ["property"] = "Identifier", + ["constructor"] = "Special", + +-- Keywords + ["conditional"] = "Conditional", + ["repeat"] = "Repeat", + ["label"] = "Label", + ["operator"] = "Operator", + ["keyword"] = "Keyword", + ["exception"] = "Exception", + + ["type"] = "Type", + ["type.builtin"] = "Type", + ["structure"] = "Structure", + ["include"] = "Include", +} + +function TSHighlighter.new(query, bufnr, ft) + local self = setmetatable({}, TSHighlighter) + self.parser = vim.treesitter.get_parser( + bufnr, + ft, + { + on_changedtree = function(...) self:on_changedtree(...) end, + on_lines = function() self.root = self.parser:parse():root() end + } + ) + + self.buf = self.parser.bufnr + + local tree = self.parser:parse() + self.root = tree:root() + self:set_query(query) + self.edit_count = 0 + self.redraw_count = 0 + self.line_count = {} + a.nvim_buf_set_option(self.buf, "syntax", "") + + -- Tricky: if syntax hasn't been enabled, we need to reload color scheme + -- but use synload.vim rather than syntax.vim to not enable + -- syntax FileType autocmds. Later on we should integrate with the + -- `:syntax` and `set syntax=...` machinery properly. + if vim.g.syntax_on ~= 1 then + vim.api.nvim_command("runtime! syntax/synload.vim") + end + return self +end + +local function is_highlight_name(capture_name) + local firstc = string.sub(capture_name, 1, 1) + return firstc ~= string.lower(firstc) +end + +function TSHighlighter:get_hl_from_capture(capture) + + local name = self.query.captures[capture] + + if is_highlight_name(name) then + -- From "Normal.left" only keep "Normal" + return vim.split(name, '.', true)[1] + else + -- Default to false to avoid recomputing + return TSHighlighter.hl_map[name] + end +end + +function TSHighlighter:set_query(query) + if type(query) == "string" then + query = vim.treesitter.parse_query(self.parser.lang, query) + elseif query == nil then + query = vim.treesitter.get_query(self.parser.lang, 'highlights') + + if query == nil then + a.err_writeln("No highlights.scm query found for " .. self.parser.lang) + + if query == nil then + query = vim.treesitter.parse_query(self.parser.lang, "") + end + end + end + + self.query = query + + self.hl_cache = setmetatable({}, { + __index = function(table, capture) + local hl = self:get_hl_from_capture(capture) + rawset(table, capture, hl) + + return hl + end + }) + + self:on_changedtree({{self.root:range()}}) +end + +function TSHighlighter:on_changedtree(changes) + -- Get a fresh root + self.root = self.parser.tree:root() + + for _, ch in ipairs(changes or {}) do + -- Try to be as exact as possible + local changed_node = self.root:descendant_for_range(ch[1], ch[2], ch[3], ch[4]) + + a.nvim_buf_clear_namespace(self.buf, ts_hs_ns, ch[1], ch[3]) + + for capture, node in self.query:iter_captures(changed_node, self.buf, ch[1], ch[3] + 1) do + local start_row, start_col, end_row, end_col = node:range() + local hl = self.hl_cache[capture] + if hl then + a.nvim__buf_add_decoration(self.buf, ts_hs_ns, hl, + start_row, start_col, + end_row, end_col, + {}) + end + end + end +end + +return TSHighlighter diff --git a/runtime/lua/vim/treesitter/language.lua b/runtime/lua/vim/treesitter/language.lua new file mode 100644 index 0000000000..b4817de91e --- /dev/null +++ b/runtime/lua/vim/treesitter/language.lua @@ -0,0 +1,26 @@ +local a = vim.api + +local M = {} + +function M.require_language(lang, path) + if vim._ts_has_language(lang) then + return true + end + if path == nil then + local fname = 'parser/' .. lang .. '.*' + local paths = a.nvim_get_runtime_file(fname, false) + if #paths == 0 then + -- TODO(bfredl): help tag? + error("no parser for '"..lang.."' language") + end + path = paths[1] + end + vim._ts_add_language(path, lang) +end + +function M.inspect_language(lang) + M.require_language(lang) + return vim._ts_inspect_language(lang) +end + +return M diff --git a/runtime/lua/vim/treesitter/query.lua b/runtime/lua/vim/treesitter/query.lua new file mode 100644 index 0000000000..914c266426 --- /dev/null +++ b/runtime/lua/vim/treesitter/query.lua @@ -0,0 +1,133 @@ +local a = vim.api +local language = require'vim.treesitter.language' + +-- query: pattern matching on trees +-- predicate matching is implemented in lua +local Query = {} +Query.__index = Query + +local M = {} + +--- Parses a query. +-- +-- @param language The language +-- @param query A string containing the query (s-expr syntax) +-- +-- @returns The query +function M.parse_query(lang, query) + language.require_language(lang) + local self = setmetatable({}, Query) + self.query = vim._ts_parse_query(lang, vim.fn.escape(query,'\\')) + self.info = self.query:inspect() + self.captures = self.info.captures + return self +end + +-- TODO(vigoux): support multiline nodes too +local function get_node_text(node, bufnr) + local start_row, start_col, end_row, end_col = node:range() + if start_row ~= end_row then + return nil + end + local line = a.nvim_buf_get_lines(bufnr, start_row, start_row+1, true)[1] + return string.sub(line, start_col+1, end_col) +end + +-- Predicate handler receive the following arguments +-- (match, pattern, bufnr, regexes, index, predicate) +local predicate_handlers = { + ["eq?"] = function(match, _, bufnr, predicate) + local node = match[predicate[2]] + local node_text = get_node_text(node, bufnr) + + local str + if type(predicate[3]) == "string" then + -- (#eq? @aa "foo") + str = predicate[3] + else + -- (#eq? @aa @bb) + str = get_node_text(match[predicate[3]], bufnr) + end + + if node_text ~= str or str == nil then + return false + end + + return true + end, + ["match?"] = function(match, _, bufnr, predicate) + local node = match[predicate[2]] + local regex = predicate[3] + local start_row, _, end_row, _ = node:range() + if start_row ~= end_row then + return false + end + + return string.find(get_node_text(node, bufnr), regex) + end, +} + +function M.add_predicate(name, handler) + if predicate_handlers[name] then + a.nvim_err_writeln("It is recomended to not overwrite predicates.") + end + + predicate_handlers[name] = handler +end + +function Query:match_preds(match, pattern, bufnr) + local preds = self.info.patterns[pattern] + if not preds then + return true + end + for _, pred in pairs(preds) do + -- Here we only want to return if a predicate DOES NOT match, and + -- continue on the other case. This way unknown predicates will not be considered, + -- which allows some testing and easier user extensibility (#12173). + -- Also, tree-sitter strips the leading # from predicates for us. + if predicate_handlers[pred[1]] and + not predicate_handlers[pred[1]](match, pattern, bufnr, pred) then + return false + end + end + return true +end + +function Query:iter_captures(node, bufnr, start, stop) + if bufnr == 0 then + bufnr = vim.api.nvim_get_current_buf() + end + local raw_iter = node:_rawquery(self.query, true, start, stop) + local function iter() + local capture, captured_node, match = raw_iter() + if match ~= nil then + local active = self:match_preds(match, match.pattern, bufnr) + match.active = active + if not active then + return iter() -- tail call: try next match + end + end + return capture, captured_node + end + return iter +end + +function Query:iter_matches(node, bufnr, start, stop) + if bufnr == 0 then + bufnr = vim.api.nvim_get_current_buf() + end + local raw_iter = node:_rawquery(self.query, false, start, stop) + local function iter() + local pattern, match = raw_iter() + if match ~= nil then + local active = self:match_preds(match, pattern, bufnr) + if not active then + return iter() -- tail call: try next match + end + end + return pattern, match + end + return iter +end + +return M diff --git a/runtime/lua/vim/tshighlighter.lua b/runtime/lua/vim/tshighlighter.lua deleted file mode 100644 index 6465751ae8..0000000000 --- a/runtime/lua/vim/tshighlighter.lua +++ /dev/null @@ -1,116 +0,0 @@ -local a = vim.api - --- support reload for quick experimentation -local TSHighlighter = rawget(vim.treesitter, 'TSHighlighter') or {} -TSHighlighter.__index = TSHighlighter -local ts_hs_ns = a.nvim_create_namespace("treesitter_hl") - --- These are conventions defined by tree-sitter, though it --- needs to be user extensible also. --- TODO(bfredl): this is very much incomplete, we will need to --- go through a few tree-sitter provided queries and decide --- on translations that makes the most sense. -TSHighlighter.hl_map = { - keyword="Keyword", - string="String", - type="Type", - comment="Comment", - constant="Constant", - operator="Operator", - number="Number", - label="Label", - ["function"]="Function", - ["function.special"]="Function", -} - -function TSHighlighter.new(query, bufnr, ft) - local self = setmetatable({}, TSHighlighter) - self.parser = vim.treesitter.get_parser( - bufnr, - ft, - { - on_changedtree = function(...) self:on_changedtree(...) end, - on_lines = function() self.root = self.parser:parse():root() end - } - ) - - self.buf = self.parser.bufnr - - local tree = self.parser:parse() - self.root = tree:root() - self:set_query(query) - self.edit_count = 0 - self.redraw_count = 0 - self.line_count = {} - a.nvim_buf_set_option(self.buf, "syntax", "") - - -- Tricky: if syntax hasn't been enabled, we need to reload color scheme - -- but use synload.vim rather than syntax.vim to not enable - -- syntax FileType autocmds. Later on we should integrate with the - -- `:syntax` and `set syntax=...` machinery properly. - if vim.g.syntax_on ~= 1 then - vim.api.nvim_command("runtime! syntax/synload.vim") - end - return self -end - -local function is_highlight_name(capture_name) - local firstc = string.sub(capture_name, 1, 1) - return firstc ~= string.lower(firstc) -end - -function TSHighlighter:get_hl_from_capture(capture) - - local name = self.query.captures[capture] - - if is_highlight_name(name) then - -- From "Normal.left" only keep "Normal" - return vim.split(name, '.', true)[1] - else - -- Default to false to avoid recomputing - return TSHighlighter.hl_map[name] - end -end - -function TSHighlighter:set_query(query) - if type(query) == "string" then - query = vim.treesitter.parse_query(self.parser.lang, query) - end - self.query = query - - self.hl_cache = setmetatable({}, { - __index = function(table, capture) - local hl = self:get_hl_from_capture(capture) - rawset(table, capture, hl) - - return hl - end - }) - - self:on_changedtree({{self.root:range()}}) -end - -function TSHighlighter:on_changedtree(changes) - -- Get a fresh root - self.root = self.parser.tree:root() - - for _, ch in ipairs(changes or {}) do - -- Try to be as exact as possible - local changed_node = self.root:descendant_for_range(ch[1], ch[2], ch[3], ch[4]) - - a.nvim_buf_clear_namespace(self.buf, ts_hs_ns, ch[1], ch[3]) - - for capture, node in self.query:iter_captures(changed_node, self.buf, ch[1], ch[3] + 1) do - local start_row, start_col, end_row, end_col = node:range() - local hl = self.hl_cache[capture] - if hl then - a.nvim__buf_add_decoration(self.buf, ts_hs_ns, hl, - start_row, start_col, - end_row, end_col, - {}) - end - end - end -end - -return TSHighlighter -- cgit