aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraltermo <unknown>2024-01-18 10:02:35 +0100
committerLewis Russell <me@lewisr.dev>2024-01-19 11:54:04 +0000
commit9707363b09dbadeb58966d6d45acca17bd74e527 (patch)
tree57d382cf4e300261124eada5d2a0a40b445fdb95
parent0b36cbbafd99e60844faafea026a89a3355a4ac4 (diff)
downloadrneovim-9707363b09dbadeb58966d6d45acca17bd74e527.tar.gz
rneovim-9707363b09dbadeb58966d6d45acca17bd74e527.tar.bz2
rneovim-9707363b09dbadeb58966d6d45acca17bd74e527.zip
refactor(lua): refactored glob
-rw-r--r--runtime/lua/vim/glob.lua40
-rw-r--r--test/functional/lua/glob_spec.lua8
2 files changed, 23 insertions, 25 deletions
diff --git a/runtime/lua/vim/glob.lua b/runtime/lua/vim/glob.lua
index 764200dd36..ad4a915a94 100644
--- a/runtime/lua/vim/glob.lua
+++ b/runtime/lua/vim/glob.lua
@@ -1,7 +1,11 @@
local lpeg = vim.lpeg
+local P, S, V, R, B = lpeg.P, lpeg.S, lpeg.V, lpeg.R, lpeg.B
+local C, Cc, Ct, Cf = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf
local M = {}
+local pathsep = P('/')
+
--- Parses a raw glob into an |lua-lpeg| pattern.
---
--- This uses glob semantics from LSP 3.17.0: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
@@ -17,18 +21,8 @@ local M = {}
---@param pattern string The raw glob pattern
---@return vim.lpeg.Pattern pattern An |lua-lpeg| representation of the pattern
function M.to_lpeg(pattern)
- local l = lpeg
-
- local P, S, V = lpeg.P, lpeg.S, lpeg.V
- local C, Cc, Ct, Cf = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf
-
- local pathsep = '/'
-
local function class(inv, ranges)
- for i, r in ipairs(ranges) do
- ranges[i] = r[1] .. r[2]
- end
- local patt = l.R(unpack(ranges))
+ local patt = R(unpack(vim.tbl_map(table.concat, ranges)))
if inv == '!' then
patt = P(1) - patt
end
@@ -44,11 +38,11 @@ function M.to_lpeg(pattern)
end
local function star(stars, after)
- return (-after * (l.P(1) - pathsep)) ^ #stars * after
+ return (-after * (P(1) - pathsep)) ^ #stars * after
end
local function dstar(after)
- return (-after * l.P(1)) ^ 0 * after
+ return (-after * P(1)) ^ 0 * after
end
local p = P({
@@ -59,11 +53,17 @@ function M.to_lpeg(pattern)
* (V('Elem') + V('End')),
mul
),
- DStar = P('**') * (P(pathsep) * (V('Elem') + V('End')) + V('End')) / dstar,
+ DStar = (B(pathsep) + -B(P(1)))
+ * P('**')
+ * (pathsep * (V('Elem') + V('End')) + V('End'))
+ / dstar,
Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star,
- Ques = P('?') * Cc(l.P(1) - pathsep),
- Class = P('[') * C(P('!') ^ -1) * Ct(Ct(C(1) * '-' * C(P(1) - ']')) ^ 1 * ']') / class,
- CondList = P('{') * Cf(V('Cond') * (P(',') * V('Cond')) ^ 0, add) * '}',
+ Ques = P('?') * Cc(P(1) - pathsep),
+ Class = P('[')
+ * C(P('!') ^ -1)
+ * Ct(Ct(C(P(1)) * P('-') * C(P(1) - P(']'))) ^ 1 * P(']'))
+ / class,
+ CondList = P('{') * Cf(V('Cond') * (P(',') * V('Cond')) ^ 0, add) * P('}'),
-- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
-- wildcard semantics it usually has.
-- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
@@ -71,9 +71,9 @@ function M.to_lpeg(pattern)
-- condition means "everything after the {}" where several other options separated by ',' may
-- exist in between that should not be matched by '*'.
Cond = Cf((V('Ques') + V('Class') + V('CondList') + (V('Literal') - S(',}'))) ^ 1, mul)
- + Cc(l.P(0)),
- Literal = P(1) / l.P,
- End = P(-1) * Cc(l.P(-1)),
+ + Cc(P(0)),
+ Literal = P(1) / P,
+ End = P(-1) * Cc(P(-1)),
})
local lpeg_pattern = p:match(pattern) --[[@as vim.lpeg.Pattern?]]
diff --git a/test/functional/lua/glob_spec.lua b/test/functional/lua/glob_spec.lua
index c7ef498008..1eac037575 100644
--- a/test/functional/lua/glob_spec.lua
+++ b/test/functional/lua/glob_spec.lua
@@ -67,18 +67,16 @@ describe('glob', function()
eq(true, match('dir/*/file.txt', 'dir/subdir/file.txt'))
eq(false, match('dir/*/file.txt', 'dir/subdir/subdir/file.txt'))
- -- TODO: The spec does not describe this, but VSCode only interprets ** when it's by
+ -- The spec does not describe this, but VSCode only interprets ** when it's by
-- itself in a path segment, and otherwise interprets ** as consecutive * directives.
- -- The following tests show how this behavior should work, but is not yet fully implemented.
- -- Currently, "a**" parses incorrectly as "a" "**" and "**a" parses correctly as "*" "*" "a".
-- see: https://github.com/microsoft/vscode/blob/eef30e7165e19b33daa1e15e92fa34ff4a5df0d3/src/vs/base/common/glob.ts#L112
eq(true, match('a**', 'abc')) -- '**' should parse as two '*'s when not by itself in a path segment
eq(true, match('**c', 'abc'))
- -- eq(false, match('a**', 'ab')) -- each '*' should still represent at least one character
+ eq(false, match('a**', 'ab')) -- each '*' should still represent at least one character
eq(false, match('**c', 'bc'))
eq(true, match('a**', 'abcd'))
eq(true, match('**d', 'abcd'))
- -- eq(false, match('a**', 'abc/d'))
+ eq(false, match('a**', 'abc/d'))
eq(false, match('**d', 'abc/d'))
end)