diff options
Diffstat (limited to 'src/gen/hashy.lua')
-rw-r--r-- | src/gen/hashy.lua | 145 |
1 files changed, 145 insertions, 0 deletions
diff --git a/src/gen/hashy.lua b/src/gen/hashy.lua new file mode 100644 index 0000000000..74b7655324 --- /dev/null +++ b/src/gen/hashy.lua @@ -0,0 +1,145 @@ +-- HASHY McHASHFACE + +local M = {} +_G.d = M + +local function setdefault(table, key) + local val = table[key] + if val == nil then + val = {} + table[key] = val + end + return val +end + +function M.build_pos_hash(strings) + local len_buckets = {} + local maxlen = 0 + for _, s in ipairs(strings) do + table.insert(setdefault(len_buckets, #s), s) + if #s > maxlen then + maxlen = #s + end + end + + local len_pos_buckets = {} + local worst_buck_size = 0 + + for len = 1, maxlen do + local strs = len_buckets[len] + if strs then + -- the best position so far generates `best_bucket` + -- with `minsize` worst case collisions + local bestpos, minsize, best_bucket = nil, #strs * 2, nil + for pos = 1, len do + local try_bucket = {} + for _, str in ipairs(strs) do + local poschar = string.sub(str, pos, pos) + table.insert(setdefault(try_bucket, poschar), str) + end + local maxsize = 1 + for _, pos_strs in pairs(try_bucket) do + maxsize = math.max(maxsize, #pos_strs) + end + if maxsize < minsize then + bestpos = pos + minsize = maxsize + best_bucket = try_bucket + end + end + len_pos_buckets[len] = { bestpos, best_bucket } + worst_buck_size = math.max(worst_buck_size, minsize) + end + end + return len_pos_buckets, maxlen, worst_buck_size +end + +function M.switcher(put, tab, maxlen, worst_buck_size) + local neworder = {} --- @type string[] + put ' switch (len) {\n' + local bucky = worst_buck_size > 1 + for len = 1, maxlen do + local vals = tab[len] + if vals then + put(' case ' .. len .. ': ') + local pos, posbuck = unpack(vals) + local keys = vim.tbl_keys(posbuck) + if #keys > 1 then + table.sort(keys) + put('switch (str[' .. (pos - 1) .. ']) {\n') + for _, c in ipairs(keys) do + local buck = posbuck[c] + local startidx = #neworder + vim.list_extend(neworder, buck) + local endidx = #neworder + put(" case '" .. c .. "': ") + if len == 1 then + put('return ' .. startidx .. ';\n') + else + put('low = ' .. startidx .. '; ') + if bucky then + put('high = ' .. endidx .. '; ') + end + put 'break;\n' + end + end + put ' default: break;\n' + put ' }\n ' + else + local startidx = #neworder + table.insert(neworder, posbuck[keys[1]][1]) + local endidx = #neworder + put('low = ' .. startidx .. '; ') + if bucky then + put('high = ' .. endidx .. '; ') + end + end + put 'break;\n' + end + end + put ' default: break;\n' + put ' }\n' + return neworder +end + +function M.hashy_hash(name, strings, access) + local stats = {} + local put = function(str) + table.insert(stats, str) + end + local len_pos_buckets, maxlen, worst_buck_size = M.build_pos_hash(strings) + put('int ' .. name .. '_hash(const char *str, size_t len)\n{\n') + if maxlen == 1 then + put('\n') -- nothing + elseif worst_buck_size > 1 then + put(' int low = 0, high = 0;\n') + else + put(' int low = -1;\n') + end + local neworder = M.switcher(put, len_pos_buckets, maxlen, worst_buck_size) + if maxlen == 1 then + put([[ + return -1; +]]) + elseif worst_buck_size > 1 then + put([[ + for (int i = low; i < high; i++) { + if (!memcmp(str, ]] .. access('i') .. [[, len)) { + return i; + } + } + return -1; +]]) + else + put([[ + if (low < 0 || memcmp(str, ]] .. access('low') .. [[, len)) { + return -1; + } + return low; +]]) + end + put '}\n\n' + return neworder, table.concat(stats) +end + +return M |