1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
-- HASHY McHASHFACE
local M = {}
_G.d = M
local function setdefault(table, key)
local val = table[key]
if val == nil then
val = {}
table[key] = val
end
return val
end
function M.build_pos_hash(strings)
local len_buckets = {}
local maxlen = 0
for _, s in ipairs(strings) do
table.insert(setdefault(len_buckets, #s), s)
if #s > maxlen then
maxlen = #s
end
end
local len_pos_buckets = {}
local worst_buck_size = 0
for len = 1, maxlen do
local strs = len_buckets[len]
if strs then
-- the best position so far generates `best_bucket`
-- with `minsize` worst case collisions
local bestpos, minsize, best_bucket = nil, #strs * 2, nil
for pos = 1, len do
local try_bucket = {}
for _, str in ipairs(strs) do
local poschar = string.sub(str, pos, pos)
table.insert(setdefault(try_bucket, poschar), str)
end
local maxsize = 1
for _, pos_strs in pairs(try_bucket) do
maxsize = math.max(maxsize, #pos_strs)
end
if maxsize < minsize then
bestpos = pos
minsize = maxsize
best_bucket = try_bucket
end
end
len_pos_buckets[len] = { bestpos, best_bucket }
worst_buck_size = math.max(worst_buck_size, minsize)
end
end
return len_pos_buckets, maxlen, worst_buck_size
end
function M.switcher(put, tab, maxlen, worst_buck_size)
local neworder = {} --- @type string[]
put ' switch (len) {\n'
local bucky = worst_buck_size > 1
for len = 1, maxlen do
local vals = tab[len]
if vals then
put(' case ' .. len .. ': ')
local pos, posbuck = unpack(vals)
local keys = vim.tbl_keys(posbuck)
if #keys > 1 then
table.sort(keys)
put('switch (str[' .. (pos - 1) .. ']) {\n')
for _, c in ipairs(keys) do
local buck = posbuck[c]
local startidx = #neworder
vim.list_extend(neworder, buck)
local endidx = #neworder
put(" case '" .. c .. "': ")
if len == 1 then
put('return ' .. startidx .. ';\n')
else
put('low = ' .. startidx .. '; ')
if bucky then
put('high = ' .. endidx .. '; ')
end
put 'break;\n'
end
end
put ' default: break;\n'
put ' }\n '
else
local startidx = #neworder
table.insert(neworder, posbuck[keys[1]][1])
local endidx = #neworder
put('low = ' .. startidx .. '; ')
if bucky then
put('high = ' .. endidx .. '; ')
end
end
put 'break;\n'
end
end
put ' default: break;\n'
put ' }\n'
return neworder
end
function M.hashy_hash(name, strings, access)
local stats = {}
local put = function(str)
table.insert(stats, str)
end
local len_pos_buckets, maxlen, worst_buck_size = M.build_pos_hash(strings)
put('int ' .. name .. '_hash(const char *str, size_t len)\n{\n')
if maxlen == 1 then
put('\n') -- nothing
elseif worst_buck_size > 1 then
put(' int low = 0, high = 0;\n')
else
put(' int low = -1;\n')
end
local neworder = M.switcher(put, len_pos_buckets, maxlen, worst_buck_size)
if maxlen == 1 then
put([[
return -1;
]])
elseif worst_buck_size > 1 then
put([[
for (int i = low; i < high; i++) {
if (!memcmp(str, ]] .. access('i') .. [[, len)) {
return i;
}
}
return -1;
]])
else
put([[
if (low < 0 || memcmp(str, ]] .. access('low') .. [[, len)) {
return -1;
}
return low;
]])
end
put '}\n\n'
return neworder, table.concat(stats)
end
return M
|