diff options
author | Jon Huhn <nojnhuh@users.noreply.github.com> | 2023-06-05 00:19:31 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-05 07:19:31 +0200 |
commit | 416fe8d185dcc072df7942953d867d9c605e9ffd (patch) | |
tree | b2755eeb5b7bdfb87c1413490195fa486ba31cb6 | |
parent | 3c6d971e5488dc75b7db07c14d01f87827f28a67 (diff) | |
download | rneovim-416fe8d185dcc072df7942953d867d9c605e9ffd.tar.gz rneovim-416fe8d185dcc072df7942953d867d9c605e9ffd.tar.bz2 rneovim-416fe8d185dcc072df7942953d867d9c605e9ffd.zip |
refactor(lsp): use LPeg for watchfiles matching (#23788)
-rw-r--r-- | runtime/lua/vim/lsp/_watchfiles.lua | 191 | ||||
-rw-r--r-- | test/functional/plugin/lsp/watchfiles_spec.lua | 91 |
2 files changed, 126 insertions, 156 deletions
diff --git a/runtime/lua/vim/lsp/_watchfiles.lua b/runtime/lua/vim/lsp/_watchfiles.lua index cf2c57db1f..14e5dc6cf8 100644 --- a/runtime/lua/vim/lsp/_watchfiles.lua +++ b/runtime/lua/vim/lsp/_watchfiles.lua @@ -1,152 +1,81 @@ local bit = require('bit') +local lpeg = require('lpeg') local watch = require('vim._watch') local protocol = require('vim.lsp.protocol') local M = {} ---@private ----Parses the raw pattern into a number of Lua-native patterns. +--- Parses the raw pattern into an |lpeg| pattern. LPeg patterns natively support the "this" or "that" +--- alternative constructions described in the LSP spec that cannot be expressed in a standard Lua pattern. --- ---@param pattern string The raw glob pattern ----@return table A list of Lua patterns. A match with any of them matches the input glob pattern. +---@return userdata An |lpeg| representation of the pattern, or nil if the pattern is invalid. local function parse(pattern) - local patterns = { '' } + local l = lpeg - local path_sep = '[/\\]' - local non_path_sep = '[^/\\]' + local P, S, V = lpeg.P, lpeg.S, lpeg.V + local C, Cc, Ct, Cf = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf - local function append(chunks) - local new_patterns = {} - for _, p in ipairs(patterns) do - for _, chunk in ipairs(chunks) do - table.insert(new_patterns, p .. chunk) - end - end - patterns = new_patterns - end + local pathsep = '/' - local function split(s, sep) - local segments = {} - local segment = '' - local in_braces = false - local in_brackets = false - for i = 1, #s do - local c = string.sub(s, i, i) - if c == sep and not in_braces and not in_brackets then - table.insert(segments, segment) - segment = '' - else - if c == '{' then - in_braces = true - elseif c == '}' then - in_braces = false - elseif c == '[' then - in_brackets = true - elseif c == ']' then - in_brackets = false - end - segment = segment .. c - end + local function class(inv, ranges) + for i, r in ipairs(ranges) do + ranges[i] = r[1] .. r[2] end - if segment ~= '' then - table.insert(segments, segment) + local patt = l.R(unpack(ranges)) + if inv == '!' then + patt = P(1) - patt end - return segments + return patt end - local function escape(c) - if - c == '?' - or c == '.' - or c == '(' - or c == ')' - or c == '%' - or c == '[' - or c == ']' - or c == '*' - or c == '+' - or c == '-' - then - return '%' .. c - end - return c + local function add(acc, a) + return acc + a end - local segments = split(pattern, '/') - for i, segment in ipairs(segments) do - local last_seg = i == #segments - if segment == '**' then - local chunks = { - path_sep .. '-', - '.-' .. path_sep, - } - if last_seg then - chunks = { '.-' } - end - append(chunks) - else - local in_braces = false - local brace_val = '' - local in_brackets = false - local bracket_val = '' - for j = 1, #segment do - local char = string.sub(segment, j, j) - if char ~= '}' and in_braces then - brace_val = brace_val .. char - else - if in_brackets and (char ~= ']' or bracket_val == '') then - local res - if char == '-' then - res = char - elseif bracket_val == '' and char == '!' then - res = '^' - elseif char == '/' then - res = '' - else - res = escape(char) - end - bracket_val = bracket_val .. res - else - if char == '{' then - in_braces = true - elseif char == '[' then - in_brackets = true - elseif char == '}' then - local choices = split(brace_val, ',') - local parsed_choices = {} - for _, choice in ipairs(choices) do - table.insert(parsed_choices, parse(choice)) - end - append(vim.tbl_flatten(parsed_choices)) - in_braces = false - brace_val = '' - elseif char == ']' then - append({ '[' .. bracket_val .. ']' }) - in_brackets = false - bracket_val = '' - elseif char == '?' then - append({ non_path_sep }) - elseif char == '*' then - append({ non_path_sep .. '-' }) - else - append({ escape(char) }) - end - end - end - end + local function mul(acc, m) + return acc * m + end - if not last_seg and (segments[i + 1] ~= '**' or i + 1 < #segments) then - append({ path_sep }) - end - end + local function star(stars, after) + return (-after * (l.P(1) - pathsep)) ^ #stars * after end - return patterns + local function dstar(after) + return (-after * l.P(1)) ^ 0 * after + end + + local p = P({ + 'Pattern', + Pattern = V('Elem') ^ -1 * V('End'), + Elem = Cf( + (V('DStar') + V('Star') + V('Ques') + V('Class') + V('CondList') + V('Literal')) + * (V('Elem') + V('End')), + mul + ), + DStar = P('**') * (P(pathsep) * (V('Elem') + V('End')) + V('End')) / dstar, + Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star, + Ques = P('?') * Cc(l.P(1) - pathsep), + Class = P('[') * C(P('!') ^ -1) * Ct(Ct(C(1) * '-' * C(P(1) - ']')) ^ 1 * ']') / class, + CondList = P('{') * Cf(V('Cond') * (P(',') * V('Cond')) ^ 0, add) * '}', + -- TODO: '*' inside a {} condition is interpreted literally but should probably have the same + -- wildcard semantics it usually has. + -- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the + -- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {} + -- condition means "everything after the {}" where several other options separated by ',' may + -- exist in between that should not be matched by '*'. + Cond = Cf((V('Ques') + V('Class') + V('CondList') + (V('Literal') - S(',}'))) ^ 1, mul) + + Cc(l.P(0)), + Literal = P(1) / l.P, + End = P(-1) * Cc(l.P(-1)), + }) + + return p:match(pattern) end ---@private --- Implementation of LSP 3.17.0's pattern matching: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern ---- Modeled after VSCode's implementation: https://github.com/microsoft/vscode/blob/0319eed971719ad48e9093daba9d65a5013ec5ab/src/vs/base/common/glob.ts#L509 --- ---@param pattern string|table The glob pattern (raw or parsed) to match. ---@param s string The string to match against pattern. @@ -155,15 +84,7 @@ function M._match(pattern, s) if type(pattern) == 'string' then pattern = parse(pattern) end - -- Since Lua's built-in string pattern matching does not have an alternate - -- operator like '|', `parse` will construct one pattern for each possible - -- alternative. Any pattern that matches thus matches the glob. - for _, p in ipairs(pattern) do - if s:match('^' .. p .. '$') then - return true - end - end - return false + return pattern:match(s) ~= nil end M._watchfunc = (vim.fn.has('win32') == 1 or vim.fn.has('mac') == 1) and watch.watch or watch.poll @@ -226,11 +147,11 @@ function M.register(reg, ctx) local kind = w.kind or protocol.WatchKind.Create + protocol.WatchKind.Change + protocol.WatchKind.Delete - local pattern = glob_pattern.pattern + local pattern = parse(glob_pattern.pattern) + assert(pattern, 'invalid pattern: ' .. glob_pattern.pattern) if relative_pattern then - pattern = base_dir .. '/' .. pattern + pattern = lpeg.P(base_dir .. '/') * pattern end - pattern = parse(pattern) table.insert(watch_regs, { base_dir = base_dir, diff --git a/test/functional/plugin/lsp/watchfiles_spec.lua b/test/functional/plugin/lsp/watchfiles_spec.lua index c5d6803a7f..a8260e0c98 100644 --- a/test/functional/plugin/lsp/watchfiles_spec.lua +++ b/test/functional/plugin/lsp/watchfiles_spec.lua @@ -2,7 +2,6 @@ local helpers = require('test.functional.helpers')(after_each) local eq = helpers.eq local exec_lua = helpers.exec_lua -local has_err = require('luassert').has.errors describe('vim.lsp._watchfiles', function() before_each(helpers.clear) @@ -17,21 +16,31 @@ describe('vim.lsp._watchfiles', function() eq(true, match('', '')) eq(false, match('', 'a')) eq(true, match('a', 'a')) + eq(true, match('/', '/')) eq(true, match('abc', 'abc')) eq(false, match('abc', 'abcdef')) eq(false, match('abc', 'a')) + eq(false, match('abc', 'bc')) eq(false, match('a', 'b')) eq(false, match('.', 'a')) eq(true, match('$', '$')) + eq(true, match('/dir', '/dir')) + eq(true, match('dir/', 'dir/')) + eq(true, match('dir/subdir', 'dir/subdir')) + eq(false, match('dir/subdir', 'subdir')) eq(false, match('dir/subdir', 'dir/subdir/file')) + eq(true, match('🤠', '🤠')) end) it('should match * wildcards', function() - -- eq(false, match('*', '')) -- TODO: this fails + eq(false, match('*', '')) eq(true, match('*', 'a')) + eq(false, match('*', '/')) eq(false, match('*', '/a')) eq(false, match('*', 'a/')) eq(true, match('*', 'aaa')) + eq(true, match('*a', 'aa')) + eq(true, match('*a', 'abca')) eq(true, match('*.txt', 'file.txt')) eq(false, match('*.txt', 'file.txtxt')) eq(false, match('*.txt', 'dir/file.txt')) @@ -40,9 +49,31 @@ describe('vim.lsp._watchfiles', function() eq(false, match('*.dir', 'test.dir/file')) eq(true, match('file.*', 'file.txt')) eq(false, match('file.*', 'not-file.txt')) + eq(true, match('*/file.txt', 'dir/file.txt')) + eq(false, match('*/file.txt', 'dir/subdir/file.txt')) + eq(false, match('*/file.txt', '/dir/file.txt')) + eq(true, match('dir/*', 'dir/file.txt')) + eq(false, match('dir/*', 'dir')) eq(false, match('dir/*.txt', 'file.txt')) eq(true, match('dir/*.txt', 'dir/file.txt')) eq(false, match('dir/*.txt', 'dir/subdir/file.txt')) + eq(false, match('dir/*/file.txt', 'dir/file.txt')) + eq(true, match('dir/*/file.txt', 'dir/subdir/file.txt')) + eq(false, match('dir/*/file.txt', 'dir/subdir/subdir/file.txt')) + + -- TODO: The spec does not describe this, but VSCode only interprets ** when it's by + -- itself in a path segment, and otherwise interprets ** as consecutive * directives. + -- The following tests show how this behavior should work, but is not yet fully implemented. + -- Currently, "a**" parses incorrectly as "a" "**" and "**a" parses correctly as "*" "*" "a". + -- see: https://github.com/microsoft/vscode/blob/eef30e7165e19b33daa1e15e92fa34ff4a5df0d3/src/vs/base/common/glob.ts#L112 + eq(true, match('a**', 'abc')) -- '**' should parse as two '*'s when not by itself in a path segment + eq(true, match('**c', 'abc')) + -- eq(false, match('a**', 'ab')) -- each '*' should still represent at least one character + eq(false, match('**c', 'bc')) + eq(true, match('a**', 'abcd')) + eq(true, match('**d', 'abcd')) + -- eq(false, match('a**', 'abc/d')) + eq(false, match('**d', 'abc/d')) end) it('should match ? wildcards', function() @@ -58,52 +89,64 @@ describe('vim.lsp._watchfiles', function() it('should match ** wildcards', function() eq(true, match('**', '')) eq(true, match('**', 'a')) + eq(true, match('**', '/')) eq(true, match('**', 'a/')) eq(true, match('**', '/a')) eq(true, match('**', 'C:/a')) eq(true, match('**', 'a/a')) eq(true, match('**', 'a/a/a')) - eq(false, match('a**', '')) - eq(true, match('a**', 'a')) - eq(true, match('a**', 'abcd')) - eq(false, match('a**', 'ba')) - eq(false, match('a**', 'a/b')) - eq(false, match('**a', '')) - eq(true, match('**a', 'a')) - eq(true, match('**a', 'dcba')) - eq(false, match('**a', 'ab')) - eq(false, match('**a', 'b/a')) + eq(false, match('/**', '')) -- /** matches leading / literally + eq(true, match('/**', '/')) + eq(true, match('/**', '/a/b/c')) + eq(true, match('**/', '')) -- **/ absorbs trailing / + eq(true, match('**/', '/a/b/c')) + eq(true, match('**/**', '')) + eq(true, match('**/**', 'a')) eq(false, match('a/**', '')) - eq(true, match('a/**', 'a')) + eq(false, match('a/**', 'a')) eq(true, match('a/**', 'a/b')) + eq(true, match('a/**', 'a/b/c')) eq(false, match('a/**', 'b/a')) eq(false, match('a/**', '/a')) eq(false, match('**/a', '')) eq(true, match('**/a', 'a')) eq(false, match('**/a', 'a/b')) eq(true, match('**/a', '/a')) + eq(true, match('**/a', '/b/a')) + eq(true, match('**/a', '/c/b/a')) + eq(true, match('**/a', '/a/a')) + eq(true, match('**/a', '/abc/a')) eq(false, match('a/**/c', 'a')) eq(false, match('a/**/c', 'c')) eq(true, match('a/**/c', 'a/c')) eq(true, match('a/**/c', 'a/b/c')) eq(true, match('a/**/c', 'a/b/b/c')) - eq(true, match('**/a/**', 'a')) - eq(true, match('**/a/**', '/dir/a')) + eq(false, match('**/a/**', 'a')) + eq(true, match('**/a/**', 'a/')) + eq(false, match('**/a/**', '/dir/a')) + eq(false, match('**/a/**', 'dir/a')) + eq(true, match('**/a/**', 'dir/a/')) eq(true, match('**/a/**', 'a/dir')) eq(true, match('**/a/**', 'dir/a/dir')) eq(true, match('**/a/**', '/a/dir')) eq(true, match('**/a/**', 'C:/a/dir')) - -- eq(false, match('**/a/**', 'a.txt')) -- TODO: this fails + eq(false, match('**/a/**', 'a.txt')) end) it('should match {} groups', function() - eq(false, match('{}', '')) - eq(true, match('{,}', '')) + eq(true, match('{}', '')) eq(false, match('{}', 'a')) + eq(true, match('a{}', 'a')) + eq(true, match('{}a', 'a')) + eq(true, match('{,}', '')) + eq(true, match('{a,}', '')) + eq(true, match('{a,}', 'a')) eq(true, match('{a}', 'a')) eq(false, match('{a}', 'aa')) eq(false, match('{a}', 'ab')) + eq(true, match('{a?c}', 'abc')) eq(false, match('{ab}', 'a')) + eq(false, match('{ab}', 'b')) eq(true, match('{ab}', 'ab')) eq(true, match('{a,b}', 'a')) eq(true, match('{a,b}', 'b')) @@ -112,11 +155,11 @@ describe('vim.lsp._watchfiles', function() eq(false, match('{ab,cd}', 'a')) eq(true, match('{ab,cd}', 'cd')) eq(true, match('{a,b,c}', 'c')) - eq(false, match('{a,{b,c}}', 'c')) -- {} can't nest + eq(true, match('{a,{b,c}}', 'c')) end) it('should match [] groups', function() - eq(true, match('[]', '')) + eq(true, match('[]', '[]')) -- empty [] is a literal eq(false, match('[a-z]', '')) eq(true, match('[a-z]', 'a')) eq(false, match('[a-z]', 'ab')) @@ -141,7 +184,7 @@ describe('vim.lsp._watchfiles', function() end) it('should match [!...] groups', function() - has_err(function() match('[!]', '') end) -- not a valid pattern + eq(true, match('[!]', '[!]')) -- [!] is a literal eq(false, match('[!a-z]', '')) eq(false, match('[!a-z]', 'a')) eq(false, match('[!a-z]', 'z')) @@ -159,11 +202,17 @@ describe('vim.lsp._watchfiles', function() it('should match complex patterns', function() eq(false, match('**/*.{c,h}', '')) eq(false, match('**/*.{c,h}', 'c')) + eq(false, match('**/*.{c,h}', 'file.m')) eq(true, match('**/*.{c,h}', 'file.c')) eq(true, match('**/*.{c,h}', 'file.h')) eq(true, match('**/*.{c,h}', '/file.c')) eq(true, match('**/*.{c,h}', 'dir/subdir/file.c')) eq(true, match('**/*.{c,h}', 'dir/subdir/file.h')) + eq(true, match('**/*.{c,h}', '/dir/subdir/file.c')) + eq(true, match('**/*.{c,h}', 'C:/dir/subdir/file.c')) + eq(true, match('/dir/**/*.{c,h}', '/dir/file.c')) + eq(false, match('/dir/**/*.{c,h}', 'dir/file.c')) + eq(true, match('/dir/**/*.{c,h}', '/dir/subdir/subdir/file.c')) eq(true, match('{[0-9],[a-z]}', '0')) eq(true, match('{[0-9],[a-z]}', 'a')) |