diff options
| author | dundargoc <gocdundar@gmail.com> | 2024-06-18 14:01:20 +0200 |
|---|---|---|
| committer | dundargoc <33953936+dundargoc@users.noreply.github.com> | 2024-08-07 15:31:18 +0200 |
| commit | 328ea02eb7dec32286ae6c691ecef71d988c905b (patch) | |
| tree | 7ed364062b329648eff486da8d34cecc95a0f0b8 /src/nvim/generators | |
| parent | 11a6f3c9301b3deb71f7e5886fce3718420355be (diff) | |
| download | rneovim-328ea02eb7dec32286ae6c691ecef71d988c905b.tar.gz rneovim-328ea02eb7dec32286ae6c691ecef71d988c905b.tar.bz2 rneovim-328ea02eb7dec32286ae6c691ecef71d988c905b.zip | |
refactor!: use utf8proc full casefolding
According to `CaseFolding-15.1.0.txt`, full casefolding should be
preferred over simple casefolding as it's considered to be more correct.
Since utf8proc already provides full casefolding it makes sense to
switch to it. This will also remove a lot of unnecessary build code.
Temporary exceptions are made for two sets characters:
- `ß` will still be considered `ß` (instead of `ss`) as using a full
casefolding requires interfering with upstream spell files in some
form.
- `İ` will still be considered `İ` (instead of `i̇`) as using full
casefolding requires making a value judgement on the "correct"
behavior. There are two, equally valid case-insensetive comparison for
this character according to unicode. It is essentially up to the
implementor to decide which conversion is correct. For this reason it
might make sense to allow users to decide which conversion should be
done as an added option to `casemap` in a future PR.
Diffstat (limited to 'src/nvim/generators')
| -rw-r--r-- | src/nvim/generators/gen_unicode_tables.lua | 50 |
1 files changed, 0 insertions, 50 deletions
diff --git a/src/nvim/generators/gen_unicode_tables.lua b/src/nvim/generators/gen_unicode_tables.lua index 305b64b7be..01eb34be88 100644 --- a/src/nvim/generators/gen_unicode_tables.lua +++ b/src/nvim/generators/gen_unicode_tables.lua @@ -28,7 +28,6 @@ local get_path = function(fname) end local unicodedata_fname = get_path('UnicodeData.txt') -local casefolding_fname = get_path('CaseFolding.txt') local eastasianwidth_fname = get_path('EastAsianWidth.txt') local emoji_fname = get_path('emoji-data.txt') @@ -77,10 +76,6 @@ local parse_data_to_props = function(ud_fp) return fp_lines_to_lists(ud_fp, 15, false) end -local parse_fold_props = function(cf_fp) - return fp_lines_to_lists(cf_fp, 4, true) -end - local parse_width_props = function(eaw_fp) return fp_lines_to_lists(eaw_fp, 2, true) end @@ -97,45 +92,6 @@ local make_range = function(start, end_, step, add) end end -local build_convert_table = function(ut_fp, props, cond_func, nl_index, table_name) - ut_fp:write('static const convertStruct ' .. table_name .. '[] = {\n') - local start = -1 - local end_ = -1 - local step = 0 - local add = -1 - for _, p in ipairs(props) do - if cond_func(p) then - local n = tonumber(p[1], 16) - local nl = tonumber(p[nl_index], 16) - if start >= 0 and add == (nl - n) and (step == 0 or n - end_ == step) then - -- Continue with the same range. - step = n - end_ - end_ = n - else - if start >= 0 then - -- Produce previous range. - ut_fp:write(make_range(start, end_, step, add)) - end - start = n - end_ = n - step = 0 - add = nl - n - end - end - end - if start >= 0 then - ut_fp:write(make_range(start, end_, step, add)) - end - ut_fp:write('};\n') -end - -local build_fold_table = function(ut_fp, foldprops) - local cond_func = function(p) - return (p[2] == 'C' or p[2] == 'S') - end - return build_convert_table(ut_fp, foldprops, cond_func, 3, 'foldCase') -end - local build_combining_table = function(ut_fp, dataprops) ut_fp:write('static const struct interval combining[] = {\n') local start = -1 @@ -291,12 +247,6 @@ local ut_fp = io.open(utf_tables_fname, 'w') build_combining_table(ut_fp, dataprops) -local cf_fp = io.open(casefolding_fname, 'r') -local foldprops = parse_fold_props(cf_fp) -cf_fp:close() - -build_fold_table(ut_fp, foldprops) - local eaw_fp = io.open(eastasianwidth_fname, 'r') local widthprops = parse_width_props(eaw_fp) eaw_fp:close() |