aboutsummaryrefslogtreecommitdiff
path: root/runtime/lua/vim/glob.lua
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/lua/vim/glob.lua')
-rw-r--r--runtime/lua/vim/glob.lua84
1 files changed, 84 insertions, 0 deletions
diff --git a/runtime/lua/vim/glob.lua b/runtime/lua/vim/glob.lua
new file mode 100644
index 0000000000..ad4a915a94
--- /dev/null
+++ b/runtime/lua/vim/glob.lua
@@ -0,0 +1,84 @@
+local lpeg = vim.lpeg
+local P, S, V, R, B = lpeg.P, lpeg.S, lpeg.V, lpeg.R, lpeg.B
+local C, Cc, Ct, Cf = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf
+
+local M = {}
+
+local pathsep = P('/')
+
+--- Parses a raw glob into an |lua-lpeg| pattern.
+---
+--- This uses glob semantics from LSP 3.17.0: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
+---
+--- Glob patterns can have the following syntax:
+--- - `*` to match one or more characters in a path segment
+--- - `?` to match on one character in a path segment
+--- - `**` to match any number of path segments, including none
+--- - `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and JavaScript files)
+--- - `[]` to declare a range of characters to match in a path segment (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …)
+--- - `[!...]` to negate a range of characters to match in a path segment (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
+---
+---@param pattern string The raw glob pattern
+---@return vim.lpeg.Pattern pattern An |lua-lpeg| representation of the pattern
+function M.to_lpeg(pattern)
+ local function class(inv, ranges)
+ local patt = R(unpack(vim.tbl_map(table.concat, ranges)))
+ if inv == '!' then
+ patt = P(1) - patt
+ end
+ return patt
+ end
+
+ local function add(acc, a)
+ return acc + a
+ end
+
+ local function mul(acc, m)
+ return acc * m
+ end
+
+ local function star(stars, after)
+ return (-after * (P(1) - pathsep)) ^ #stars * after
+ end
+
+ local function dstar(after)
+ return (-after * P(1)) ^ 0 * after
+ end
+
+ local p = P({
+ 'Pattern',
+ Pattern = V('Elem') ^ -1 * V('End'),
+ Elem = Cf(
+ (V('DStar') + V('Star') + V('Ques') + V('Class') + V('CondList') + V('Literal'))
+ * (V('Elem') + V('End')),
+ mul
+ ),
+ DStar = (B(pathsep) + -B(P(1)))
+ * P('**')
+ * (pathsep * (V('Elem') + V('End')) + V('End'))
+ / dstar,
+ Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star,
+ Ques = P('?') * Cc(P(1) - pathsep),
+ Class = P('[')
+ * C(P('!') ^ -1)
+ * Ct(Ct(C(P(1)) * P('-') * C(P(1) - P(']'))) ^ 1 * P(']'))
+ / class,
+ CondList = P('{') * Cf(V('Cond') * (P(',') * V('Cond')) ^ 0, add) * P('}'),
+ -- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
+ -- wildcard semantics it usually has.
+ -- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
+ -- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {}
+ -- condition means "everything after the {}" where several other options separated by ',' may
+ -- exist in between that should not be matched by '*'.
+ Cond = Cf((V('Ques') + V('Class') + V('CondList') + (V('Literal') - S(',}'))) ^ 1, mul)
+ + Cc(P(0)),
+ Literal = P(1) / P,
+ End = P(-1) * Cc(P(-1)),
+ })
+
+ local lpeg_pattern = p:match(pattern) --[[@as vim.lpeg.Pattern?]]
+ assert(lpeg_pattern, 'Invalid glob')
+ return lpeg_pattern
+end
+
+return M