aboutsummaryrefslogtreecommitdiff
path: root/src/nvim/generators/hashy.lua
blob: b10bafb9f90d684a845c6553b2740fc88c75df22 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
-- HASHY McHASHFACE

local M = {}
_G.d = M


local function setdefault(table, key)
  local val = table[key]
  if val == nil then
    val = {}
    table[key] = val
  end
  return val
end

function M.build_pos_hash(strings)
  local len_buckets = {}
  local maxlen = 0
  for _,s in ipairs(strings) do
    table.insert(setdefault(len_buckets, #s),s)
    if #s > maxlen then maxlen = #s end
  end

  local len_pos_buckets = {}
  local worst_buck_size = 0

  for len = 1,maxlen do
    local strs = len_buckets[len]
    if strs then
      -- the best position so far generates `best_bucket`
      -- with `minsize` worst case collisions
      local bestpos, minsize, best_bucket = nil, #strs*2, nil
      for pos = 1,len do
        local try_bucket = {}
        for _,str in ipairs(strs) do
          local poschar = string.sub(str, pos, pos)
          table.insert(setdefault(try_bucket, poschar), str)
        end
        local maxsize = 1
        for _,pos_strs in pairs(try_bucket) do
          maxsize = math.max(maxsize, #pos_strs)
        end
        if maxsize < minsize then
          bestpos = pos
          minsize = maxsize
          best_bucket = try_bucket
        end
      end
      len_pos_buckets[len] = {bestpos, best_bucket}
      worst_buck_size = math.max(worst_buck_size, minsize)
    end
  end
  return len_pos_buckets, maxlen, worst_buck_size
end

function M.switcher(put, tab, maxlen, worst_buck_size)
  local neworder = {}
  put "  switch (len) {\n"
  local bucky = worst_buck_size > 1
  for len = 1,maxlen do
    local vals = tab[len]
    if vals then
      put("    case "..len..": ")
      local pos, posbuck = unpack(vals)
      local keys = vim.tbl_keys(posbuck)
      if #keys > 1 then
        table.sort(keys)
        put("switch (str["..(pos-1).."]) {\n")
        for _,c in ipairs(keys) do
          local buck = posbuck[c]
          local startidx = #neworder
          vim.list_extend(neworder, buck)
          local endidx = #neworder
          put("      case '"..c.."': ")
          put("low = "..startidx.."; ")
          if bucky then put("high = "..endidx.."; ") end
          put "break;\n"
        end
        put "      default: break;\n"
        put "    }\n    "
      else
          local startidx = #neworder
          table.insert(neworder, posbuck[keys[1]][1])
          local endidx = #neworder
          put("low = "..startidx.."; ")
          if bucky then put("high = "..endidx.."; ") end
      end
      put "break;\n"
    end
  end
  put "    default: break;\n"
  put "  }\n"
  return neworder
end

function M.hashy_hash(name, strings, access)
  local stats = {}
  local put = function(str) table.insert(stats, str) end
  local len_pos_buckets, maxlen, worst_buck_size = M.build_pos_hash(strings)
  put("int "..name.."_hash(const char *str, size_t len)\n{\n")
  if worst_buck_size > 1 then
    put("  int low = 0, high = 0;\n")
  else
    put("  int low = -1;\n")
  end
  local neworder = M.switcher(put, len_pos_buckets, maxlen, worst_buck_size)
  if worst_buck_size > 1 then
    put ([[
  for (int i = low; i < high; i++) {
    if (!memcmp(str, ]]..access("i")..[[, len)) {
      return i;
    }
  }
  return -1;
]])
  else
    put ([[
  if (low < 0 || memcmp(str, ]]..access("low")..[[, len)) {
    return -1;
  }
  return low;
]])
  end
  put "}\n\n"
  return neworder, table.concat(stats)
end

return M