diff options
Diffstat (limited to 'src/nvim')
| -rw-r--r-- | src/nvim/generators/gen_unicode_tables.lua | 50 | ||||
| -rw-r--r-- | src/nvim/mbyte.c | 53 |
2 files changed, 25 insertions, 78 deletions
diff --git a/src/nvim/generators/gen_unicode_tables.lua b/src/nvim/generators/gen_unicode_tables.lua index 305b64b7be..01eb34be88 100644 --- a/src/nvim/generators/gen_unicode_tables.lua +++ b/src/nvim/generators/gen_unicode_tables.lua @@ -28,7 +28,6 @@ local get_path = function(fname) end local unicodedata_fname = get_path('UnicodeData.txt') -local casefolding_fname = get_path('CaseFolding.txt') local eastasianwidth_fname = get_path('EastAsianWidth.txt') local emoji_fname = get_path('emoji-data.txt') @@ -77,10 +76,6 @@ local parse_data_to_props = function(ud_fp) return fp_lines_to_lists(ud_fp, 15, false) end -local parse_fold_props = function(cf_fp) - return fp_lines_to_lists(cf_fp, 4, true) -end - local parse_width_props = function(eaw_fp) return fp_lines_to_lists(eaw_fp, 2, true) end @@ -97,45 +92,6 @@ local make_range = function(start, end_, step, add) end end -local build_convert_table = function(ut_fp, props, cond_func, nl_index, table_name) - ut_fp:write('static const convertStruct ' .. table_name .. '[] = {\n') - local start = -1 - local end_ = -1 - local step = 0 - local add = -1 - for _, p in ipairs(props) do - if cond_func(p) then - local n = tonumber(p[1], 16) - local nl = tonumber(p[nl_index], 16) - if start >= 0 and add == (nl - n) and (step == 0 or n - end_ == step) then - -- Continue with the same range. - step = n - end_ - end_ = n - else - if start >= 0 then - -- Produce previous range. - ut_fp:write(make_range(start, end_, step, add)) - end - start = n - end_ = n - step = 0 - add = nl - n - end - end - end - if start >= 0 then - ut_fp:write(make_range(start, end_, step, add)) - end - ut_fp:write('};\n') -end - -local build_fold_table = function(ut_fp, foldprops) - local cond_func = function(p) - return (p[2] == 'C' or p[2] == 'S') - end - return build_convert_table(ut_fp, foldprops, cond_func, 3, 'foldCase') -end - local build_combining_table = function(ut_fp, dataprops) ut_fp:write('static const struct interval combining[] = {\n') local start = -1 @@ -291,12 +247,6 @@ local ut_fp = io.open(utf_tables_fname, 'w') build_combining_table(ut_fp, dataprops) -local cf_fp = io.open(casefolding_fname, 'r') -local foldprops = parse_fold_props(cf_fp) -cf_fp:close() - -build_fold_table(ut_fp, foldprops) - local eaw_fp = io.open(eastasianwidth_fname, 'r') local widthprops = parse_width_props(eaw_fp) eaw_fp:close() diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index c6cefb8a91..0c1b537f3a 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1284,41 +1284,38 @@ bool utf_ambiguous_width(int c) || intable(emoji_all, ARRAY_SIZE(emoji_all), c)); } -// Generic conversion function for case operations. -// Return the converted equivalent of "a", which is a UCS-4 character. Use -// the given conversion "table". Uses binary search on "table". -static int utf_convert(int a, const convertStruct *const table, size_t n_items) -{ - // indices into table - size_t start = 0; - size_t end = n_items; - while (start < end) { - // need to search further - size_t mid = (end + start) / 2; - if (table[mid].rangeEnd < a) { - start = mid + 1; - } else { - end = mid; - } - } - if (start < n_items - && table[start].rangeStart <= a - && a <= table[start].rangeEnd - && (a - table[start].rangeStart) % table[start].step == 0) { - return a + table[start].offset; - } - return a; -} - // Return the folded-case equivalent of "a", which is a UCS-4 character. Uses -// simple case folding. +// full case folding. int utf_fold(int a) { if (a < 0x80) { // be fast for ASCII return a >= 0x41 && a <= 0x5a ? a + 32 : a; } - return utf_convert(a, foldCase, ARRAY_SIZE(foldCase)); + + // TODO(dundargoc): utf8proc only does full case folding, which breaks some tests. This is a + // temporary workaround to circumvent failing tests. + // + // (0xdf) ß == ss in full casefolding. Using this however breaks the vim spell tests and the error + // E763 is thrown. This is due to the test spells relying on the vim spell files. + // + // (0x130) İ == i̇ in full casefolding. + if (a == 0xdf || a == 0x130) { + return a; + } + + utf8proc_uint8_t input_str[16] = { 0 }; + utf8proc_encode_char(a, input_str); + + utf8proc_uint8_t *fold_str_utf; + utf8proc_map((utf8proc_uint8_t *)input_str, 0, &fold_str_utf, + UTF8PROC_NULLTERM | UTF8PROC_CASEFOLD); + + int fold_codepoint_utf = utf_ptr2char((char *)fold_str_utf); + + xfree(fold_str_utf); + + return fold_codepoint_utf; } // Vim's own character class functions. These exist because many library |