diff options
author | altermo <unknown> | 2024-01-18 10:02:35 +0100 |
---|---|---|
committer | Lewis Russell <me@lewisr.dev> | 2024-01-19 11:54:04 +0000 |
commit | 9707363b09dbadeb58966d6d45acca17bd74e527 (patch) | |
tree | 57d382cf4e300261124eada5d2a0a40b445fdb95 | |
parent | 0b36cbbafd99e60844faafea026a89a3355a4ac4 (diff) | |
download | rneovim-9707363b09dbadeb58966d6d45acca17bd74e527.tar.gz rneovim-9707363b09dbadeb58966d6d45acca17bd74e527.tar.bz2 rneovim-9707363b09dbadeb58966d6d45acca17bd74e527.zip |
refactor(lua): refactored glob
-rw-r--r-- | runtime/lua/vim/glob.lua | 40 | ||||
-rw-r--r-- | test/functional/lua/glob_spec.lua | 8 |
2 files changed, 23 insertions, 25 deletions
diff --git a/runtime/lua/vim/glob.lua b/runtime/lua/vim/glob.lua index 764200dd36..ad4a915a94 100644 --- a/runtime/lua/vim/glob.lua +++ b/runtime/lua/vim/glob.lua @@ -1,7 +1,11 @@ local lpeg = vim.lpeg +local P, S, V, R, B = lpeg.P, lpeg.S, lpeg.V, lpeg.R, lpeg.B +local C, Cc, Ct, Cf = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf local M = {} +local pathsep = P('/') + --- Parses a raw glob into an |lua-lpeg| pattern. --- --- This uses glob semantics from LSP 3.17.0: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern @@ -17,18 +21,8 @@ local M = {} ---@param pattern string The raw glob pattern ---@return vim.lpeg.Pattern pattern An |lua-lpeg| representation of the pattern function M.to_lpeg(pattern) - local l = lpeg - - local P, S, V = lpeg.P, lpeg.S, lpeg.V - local C, Cc, Ct, Cf = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf - - local pathsep = '/' - local function class(inv, ranges) - for i, r in ipairs(ranges) do - ranges[i] = r[1] .. r[2] - end - local patt = l.R(unpack(ranges)) + local patt = R(unpack(vim.tbl_map(table.concat, ranges))) if inv == '!' then patt = P(1) - patt end @@ -44,11 +38,11 @@ function M.to_lpeg(pattern) end local function star(stars, after) - return (-after * (l.P(1) - pathsep)) ^ #stars * after + return (-after * (P(1) - pathsep)) ^ #stars * after end local function dstar(after) - return (-after * l.P(1)) ^ 0 * after + return (-after * P(1)) ^ 0 * after end local p = P({ @@ -59,11 +53,17 @@ function M.to_lpeg(pattern) * (V('Elem') + V('End')), mul ), - DStar = P('**') * (P(pathsep) * (V('Elem') + V('End')) + V('End')) / dstar, + DStar = (B(pathsep) + -B(P(1))) + * P('**') + * (pathsep * (V('Elem') + V('End')) + V('End')) + / dstar, Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star, - Ques = P('?') * Cc(l.P(1) - pathsep), - Class = P('[') * C(P('!') ^ -1) * Ct(Ct(C(1) * '-' * C(P(1) - ']')) ^ 1 * ']') / class, - CondList = P('{') * Cf(V('Cond') * (P(',') * V('Cond')) ^ 0, add) * '}', + Ques = P('?') * Cc(P(1) - pathsep), + Class = P('[') + * C(P('!') ^ -1) + * Ct(Ct(C(P(1)) * P('-') * C(P(1) - P(']'))) ^ 1 * P(']')) + / class, + CondList = P('{') * Cf(V('Cond') * (P(',') * V('Cond')) ^ 0, add) * P('}'), -- TODO: '*' inside a {} condition is interpreted literally but should probably have the same -- wildcard semantics it usually has. -- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the @@ -71,9 +71,9 @@ function M.to_lpeg(pattern) -- condition means "everything after the {}" where several other options separated by ',' may -- exist in between that should not be matched by '*'. Cond = Cf((V('Ques') + V('Class') + V('CondList') + (V('Literal') - S(',}'))) ^ 1, mul) - + Cc(l.P(0)), - Literal = P(1) / l.P, - End = P(-1) * Cc(l.P(-1)), + + Cc(P(0)), + Literal = P(1) / P, + End = P(-1) * Cc(P(-1)), }) local lpeg_pattern = p:match(pattern) --[[@as vim.lpeg.Pattern?]] diff --git a/test/functional/lua/glob_spec.lua b/test/functional/lua/glob_spec.lua index c7ef498008..1eac037575 100644 --- a/test/functional/lua/glob_spec.lua +++ b/test/functional/lua/glob_spec.lua @@ -67,18 +67,16 @@ describe('glob', function() eq(true, match('dir/*/file.txt', 'dir/subdir/file.txt')) eq(false, match('dir/*/file.txt', 'dir/subdir/subdir/file.txt')) - -- TODO: The spec does not describe this, but VSCode only interprets ** when it's by + -- The spec does not describe this, but VSCode only interprets ** when it's by -- itself in a path segment, and otherwise interprets ** as consecutive * directives. - -- The following tests show how this behavior should work, but is not yet fully implemented. - -- Currently, "a**" parses incorrectly as "a" "**" and "**a" parses correctly as "*" "*" "a". -- see: https://github.com/microsoft/vscode/blob/eef30e7165e19b33daa1e15e92fa34ff4a5df0d3/src/vs/base/common/glob.ts#L112 eq(true, match('a**', 'abc')) -- '**' should parse as two '*'s when not by itself in a path segment eq(true, match('**c', 'abc')) - -- eq(false, match('a**', 'ab')) -- each '*' should still represent at least one character + eq(false, match('a**', 'ab')) -- each '*' should still represent at least one character eq(false, match('**c', 'bc')) eq(true, match('a**', 'abcd')) eq(true, match('**d', 'abcd')) - -- eq(false, match('a**', 'abc/d')) + eq(false, match('a**', 'abc/d')) eq(false, match('**d', 'abc/d')) end) |