aboutsummaryrefslogtreecommitdiff
path: root/runtime/lua/vim/glob.lua
blob: 4f86d5e1caeafca0ff387141d808b58494386aa7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
local lpeg = vim.lpeg
local P, S, V, R, B = lpeg.P, lpeg.S, lpeg.V, lpeg.R, lpeg.B
local C, Cc, Ct, Cf, Cmt = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cmt

local M = {}

local pathsep = P('/')

--- Parses a raw glob into an |lua-lpeg| pattern.
---
--- This uses glob semantics from LSP 3.17.0: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
---
--- Glob patterns can have the following syntax:
--- - `*` to match one or more characters in a path segment
--- - `?` to match on one character in a path segment
--- - `**` to match any number of path segments, including none
--- - `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and JavaScript files)
--- - `[]` to declare a range of characters to match in a path segment (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …)
--- - `[!...]` to negate a range of characters to match in a path segment (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
---
---@param pattern string The raw glob pattern
---@return vim.lpeg.Pattern pattern An |lua-lpeg| representation of the pattern
function M.to_lpeg(pattern)
  local function class(inv, ranges)
    local patt = R(unpack(vim.tbl_map(table.concat, ranges)))
    if inv == '!' then
      patt = P(1) - patt
    end
    return patt
  end

  local function condlist(conds, after)
    return vim.iter(conds):fold(P(false), function(acc, cond)
      return acc + cond * after
    end)
  end

  local function mul(acc, m)
    return acc * m
  end

  local function star(stars, after)
    return (-after * (P(1) - pathsep)) ^ #stars * after
  end

  local function dstar(after)
    return (-after * P(1)) ^ 0 * after
  end

  -- luacheck: push ignore s
  local function cut(_s, idx, match)
    return idx, match
  end
  -- luacheck: pop

  local p = P({
    'Pattern',
    Pattern = V('Elem') ^ -1 * V('End'),
    Elem = Cmt(
      Cf(
        (V('DStar') + V('Star') + V('Ques') + V('Class') + V('CondList') + V('Literal'))
          * (V('Elem') + V('End')),
        mul
      ),
      cut
    ),
    DStar = (B(pathsep) + -B(P(1)))
      * P('**')
      * (pathsep * (V('Elem') + V('End')) + V('End'))
      / dstar,
    Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star,
    Ques = P('?') * Cc(P(1) - pathsep),
    Class = P('[')
      * C(P('!') ^ -1)
      * Ct(Ct(C(P(1)) * P('-') * C(P(1) - P(']'))) ^ 1 * P(']'))
      / class,
    CondList = P('{') * Ct(V('Cond') * (P(',') * V('Cond')) ^ 0) * P('}') * V('Pattern') / condlist,
    -- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
    -- wildcard semantics it usually has.
    -- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
    -- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {}
    -- condition means "everything after the {}" where several other options separated by ',' may
    -- exist in between that should not be matched by '*'.
    Cond = Cmt(Cf((V('Ques') + V('Class') + V('Literal') - S(',}')) ^ 1, mul), cut) + Cc(P(0)),
    Literal = P(1) / P,
    End = P(-1) * Cc(P(-1)),
  })

  local lpeg_pattern = p:match(pattern) --[[@as vim.lpeg.Pattern?]]
  assert(lpeg_pattern, 'Invalid glob')
  return lpeg_pattern
end

return M