aboutsummaryrefslogtreecommitdiff
path: root/src/nvim
diff options
context:
space:
mode:
Diffstat (limited to 'src/nvim')
-rw-r--r--src/nvim/generators/gen_unicode_tables.lua50
-rw-r--r--src/nvim/mbyte.c53
2 files changed, 25 insertions, 78 deletions
diff --git a/src/nvim/generators/gen_unicode_tables.lua b/src/nvim/generators/gen_unicode_tables.lua
index 305b64b7be..01eb34be88 100644
--- a/src/nvim/generators/gen_unicode_tables.lua
+++ b/src/nvim/generators/gen_unicode_tables.lua
@@ -28,7 +28,6 @@ local get_path = function(fname)
end
local unicodedata_fname = get_path('UnicodeData.txt')
-local casefolding_fname = get_path('CaseFolding.txt')
local eastasianwidth_fname = get_path('EastAsianWidth.txt')
local emoji_fname = get_path('emoji-data.txt')
@@ -77,10 +76,6 @@ local parse_data_to_props = function(ud_fp)
return fp_lines_to_lists(ud_fp, 15, false)
end
-local parse_fold_props = function(cf_fp)
- return fp_lines_to_lists(cf_fp, 4, true)
-end
-
local parse_width_props = function(eaw_fp)
return fp_lines_to_lists(eaw_fp, 2, true)
end
@@ -97,45 +92,6 @@ local make_range = function(start, end_, step, add)
end
end
-local build_convert_table = function(ut_fp, props, cond_func, nl_index, table_name)
- ut_fp:write('static const convertStruct ' .. table_name .. '[] = {\n')
- local start = -1
- local end_ = -1
- local step = 0
- local add = -1
- for _, p in ipairs(props) do
- if cond_func(p) then
- local n = tonumber(p[1], 16)
- local nl = tonumber(p[nl_index], 16)
- if start >= 0 and add == (nl - n) and (step == 0 or n - end_ == step) then
- -- Continue with the same range.
- step = n - end_
- end_ = n
- else
- if start >= 0 then
- -- Produce previous range.
- ut_fp:write(make_range(start, end_, step, add))
- end
- start = n
- end_ = n
- step = 0
- add = nl - n
- end
- end
- end
- if start >= 0 then
- ut_fp:write(make_range(start, end_, step, add))
- end
- ut_fp:write('};\n')
-end
-
-local build_fold_table = function(ut_fp, foldprops)
- local cond_func = function(p)
- return (p[2] == 'C' or p[2] == 'S')
- end
- return build_convert_table(ut_fp, foldprops, cond_func, 3, 'foldCase')
-end
-
local build_combining_table = function(ut_fp, dataprops)
ut_fp:write('static const struct interval combining[] = {\n')
local start = -1
@@ -291,12 +247,6 @@ local ut_fp = io.open(utf_tables_fname, 'w')
build_combining_table(ut_fp, dataprops)
-local cf_fp = io.open(casefolding_fname, 'r')
-local foldprops = parse_fold_props(cf_fp)
-cf_fp:close()
-
-build_fold_table(ut_fp, foldprops)
-
local eaw_fp = io.open(eastasianwidth_fname, 'r')
local widthprops = parse_width_props(eaw_fp)
eaw_fp:close()
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index c6cefb8a91..0c1b537f3a 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -1284,41 +1284,38 @@ bool utf_ambiguous_width(int c)
|| intable(emoji_all, ARRAY_SIZE(emoji_all), c));
}
-// Generic conversion function for case operations.
-// Return the converted equivalent of "a", which is a UCS-4 character. Use
-// the given conversion "table". Uses binary search on "table".
-static int utf_convert(int a, const convertStruct *const table, size_t n_items)
-{
- // indices into table
- size_t start = 0;
- size_t end = n_items;
- while (start < end) {
- // need to search further
- size_t mid = (end + start) / 2;
- if (table[mid].rangeEnd < a) {
- start = mid + 1;
- } else {
- end = mid;
- }
- }
- if (start < n_items
- && table[start].rangeStart <= a
- && a <= table[start].rangeEnd
- && (a - table[start].rangeStart) % table[start].step == 0) {
- return a + table[start].offset;
- }
- return a;
-}
-
// Return the folded-case equivalent of "a", which is a UCS-4 character. Uses
-// simple case folding.
+// full case folding.
int utf_fold(int a)
{
if (a < 0x80) {
// be fast for ASCII
return a >= 0x41 && a <= 0x5a ? a + 32 : a;
}
- return utf_convert(a, foldCase, ARRAY_SIZE(foldCase));
+
+ // TODO(dundargoc): utf8proc only does full case folding, which breaks some tests. This is a
+ // temporary workaround to circumvent failing tests.
+ //
+ // (0xdf) ß == ss in full casefolding. Using this however breaks the vim spell tests and the error
+ // E763 is thrown. This is due to the test spells relying on the vim spell files.
+ //
+ // (0x130) İ == i̇ in full casefolding.
+ if (a == 0xdf || a == 0x130) {
+ return a;
+ }
+
+ utf8proc_uint8_t input_str[16] = { 0 };
+ utf8proc_encode_char(a, input_str);
+
+ utf8proc_uint8_t *fold_str_utf;
+ utf8proc_map((utf8proc_uint8_t *)input_str, 0, &fold_str_utf,
+ UTF8PROC_NULLTERM | UTF8PROC_CASEFOLD);
+
+ int fold_codepoint_utf = utf_ptr2char((char *)fold_str_utf);
+
+ xfree(fold_str_utf);
+
+ return fold_codepoint_utf;
}
// Vim's own character class functions. These exist because many library