aboutsummaryrefslogtreecommitdiff
path: root/src/gen/hashy.lua
diff options
context:
space:
mode:
Diffstat (limited to 'src/gen/hashy.lua')
-rw-r--r--src/gen/hashy.lua145
1 files changed, 145 insertions, 0 deletions
diff --git a/src/gen/hashy.lua b/src/gen/hashy.lua
new file mode 100644
index 0000000000..74b7655324
--- /dev/null
+++ b/src/gen/hashy.lua
@@ -0,0 +1,145 @@
+-- HASHY McHASHFACE
+
+local M = {}
+_G.d = M
+
+local function setdefault(table, key)
+ local val = table[key]
+ if val == nil then
+ val = {}
+ table[key] = val
+ end
+ return val
+end
+
+function M.build_pos_hash(strings)
+ local len_buckets = {}
+ local maxlen = 0
+ for _, s in ipairs(strings) do
+ table.insert(setdefault(len_buckets, #s), s)
+ if #s > maxlen then
+ maxlen = #s
+ end
+ end
+
+ local len_pos_buckets = {}
+ local worst_buck_size = 0
+
+ for len = 1, maxlen do
+ local strs = len_buckets[len]
+ if strs then
+ -- the best position so far generates `best_bucket`
+ -- with `minsize` worst case collisions
+ local bestpos, minsize, best_bucket = nil, #strs * 2, nil
+ for pos = 1, len do
+ local try_bucket = {}
+ for _, str in ipairs(strs) do
+ local poschar = string.sub(str, pos, pos)
+ table.insert(setdefault(try_bucket, poschar), str)
+ end
+ local maxsize = 1
+ for _, pos_strs in pairs(try_bucket) do
+ maxsize = math.max(maxsize, #pos_strs)
+ end
+ if maxsize < minsize then
+ bestpos = pos
+ minsize = maxsize
+ best_bucket = try_bucket
+ end
+ end
+ len_pos_buckets[len] = { bestpos, best_bucket }
+ worst_buck_size = math.max(worst_buck_size, minsize)
+ end
+ end
+ return len_pos_buckets, maxlen, worst_buck_size
+end
+
+function M.switcher(put, tab, maxlen, worst_buck_size)
+ local neworder = {} --- @type string[]
+ put ' switch (len) {\n'
+ local bucky = worst_buck_size > 1
+ for len = 1, maxlen do
+ local vals = tab[len]
+ if vals then
+ put(' case ' .. len .. ': ')
+ local pos, posbuck = unpack(vals)
+ local keys = vim.tbl_keys(posbuck)
+ if #keys > 1 then
+ table.sort(keys)
+ put('switch (str[' .. (pos - 1) .. ']) {\n')
+ for _, c in ipairs(keys) do
+ local buck = posbuck[c]
+ local startidx = #neworder
+ vim.list_extend(neworder, buck)
+ local endidx = #neworder
+ put(" case '" .. c .. "': ")
+ if len == 1 then
+ put('return ' .. startidx .. ';\n')
+ else
+ put('low = ' .. startidx .. '; ')
+ if bucky then
+ put('high = ' .. endidx .. '; ')
+ end
+ put 'break;\n'
+ end
+ end
+ put ' default: break;\n'
+ put ' }\n '
+ else
+ local startidx = #neworder
+ table.insert(neworder, posbuck[keys[1]][1])
+ local endidx = #neworder
+ put('low = ' .. startidx .. '; ')
+ if bucky then
+ put('high = ' .. endidx .. '; ')
+ end
+ end
+ put 'break;\n'
+ end
+ end
+ put ' default: break;\n'
+ put ' }\n'
+ return neworder
+end
+
+function M.hashy_hash(name, strings, access)
+ local stats = {}
+ local put = function(str)
+ table.insert(stats, str)
+ end
+ local len_pos_buckets, maxlen, worst_buck_size = M.build_pos_hash(strings)
+ put('int ' .. name .. '_hash(const char *str, size_t len)\n{\n')
+ if maxlen == 1 then
+ put('\n') -- nothing
+ elseif worst_buck_size > 1 then
+ put(' int low = 0, high = 0;\n')
+ else
+ put(' int low = -1;\n')
+ end
+ local neworder = M.switcher(put, len_pos_buckets, maxlen, worst_buck_size)
+ if maxlen == 1 then
+ put([[
+ return -1;
+]])
+ elseif worst_buck_size > 1 then
+ put([[
+ for (int i = low; i < high; i++) {
+ if (!memcmp(str, ]] .. access('i') .. [[, len)) {
+ return i;
+ }
+ }
+ return -1;
+]])
+ else
+ put([[
+ if (low < 0 || memcmp(str, ]] .. access('low') .. [[, len)) {
+ return -1;
+ }
+ return low;
+]])
+ end
+ put '}\n\n'
+ return neworder, table.concat(stats)
+end
+
+return M