diff options
author | Jan Edmund Lazo <jan.lazo@mail.utoronto.ca> | 2020-06-09 18:17:47 -0400 |
---|---|---|
committer | Jan Edmund Lazo <jan.lazo@mail.utoronto.ca> | 2020-06-18 18:01:42 -0400 |
commit | 33985a3a15fac1486cc11e8df272a0eeaa5fc77d (patch) | |
tree | 5f182fb2f99ee0d1e42052e2a314d10f6de607f1 | |
parent | b7cae2bca3028c170871f05670bbd6a7ad508c56 (diff) | |
download | rneovim-33985a3a15fac1486cc11e8df272a0eeaa5fc77d.tar.gz rneovim-33985a3a15fac1486cc11e8df272a0eeaa5fc77d.tar.bz2 rneovim-33985a3a15fac1486cc11e8df272a0eeaa5fc77d.zip |
vim-patch:8.2.0938: NFA regexp uses tolower ()to compare ignore-case
Problem: NFA regexp uses tolower() to compare ignore-case. (Thayne McCombs)
Solution: Use utf_fold() when possible. (ref. neovim vim/vim#12456)
https://github.com/vim/vim/commit/59de417b904bbd204e313f015839317b577bd124
-rw-r--r-- | src/nvim/regexp_nfa.c | 17 | ||||
-rw-r--r-- | src/nvim/testdir/test_regexp_utf8.vim | 19 |
2 files changed, 28 insertions, 8 deletions
diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 2ca5f42e51..387732fdee 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -4960,7 +4960,7 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text) int c2_len = PTR2LEN(s2); int c2 = PTR2CHAR(s2); - if ((c1 != c2 && (!rex.reg_ic || mb_tolower(c1) != mb_tolower(c2))) + if ((c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2))) || c1_len != c2_len) { match = false; break; @@ -5682,11 +5682,11 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, break; } if (rex.reg_ic) { - int curc_low = mb_tolower(curc); + int curc_low = utf_fold(curc); int done = false; for (; c1 <= c2; c1++) { - if (mb_tolower(c1) == curc_low) { + if (utf_fold(c1) == curc_low) { result = result_if_matched; done = TRUE; break; @@ -5698,8 +5698,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, } } else if (state->c < 0 ? check_char_class(state->c, curc) : (curc == state->c - || (rex.reg_ic && mb_tolower(curc) - == mb_tolower(state->c)))) { + || (rex.reg_ic + && utf_fold(curc) == utf_fold(state->c)))) { result = result_if_matched; break; } @@ -6106,7 +6106,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, result = (c == curc); if (!result && rex.reg_ic) { - result = mb_tolower(c) == mb_tolower(curc); + result = utf_fold(c) == utf_fold(curc); } // If rex.reg_icombine is not set only skip over the character @@ -6260,8 +6260,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // Checking if the required start character matches is // cheaper than adding a state that won't match. c = PTR2CHAR(reginput + clen); - if (c != prog->regstart && (!rex.reg_ic || mb_tolower(c) - != mb_tolower(prog->regstart))) { + if (c != prog->regstart + && (!rex.reg_ic + || utf_fold(c) != utf_fold(prog->regstart))) { #ifdef REGEXP_DEBUG fprintf(log_fd, " Skipping start state, regstart does not match\n"); diff --git a/src/nvim/testdir/test_regexp_utf8.vim b/src/nvim/testdir/test_regexp_utf8.vim index ecd0e8d56b..f48458566b 100644 --- a/src/nvim/testdir/test_regexp_utf8.vim +++ b/src/nvim/testdir/test_regexp_utf8.vim @@ -332,4 +332,23 @@ func Test_ambiwidth() set regexpengine& ambiwidth& endfunc +func Run_regexp_ignore_case() + call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g')) + + call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g')) + call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g')) + call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g')) + call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g')) + call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g')) + call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g')) +endfunc + +func Test_regexp_ignore_case() + set regexpengine=1 + call Run_regexp_ignore_case() + set regexpengine=2 + call Run_regexp_ignore_case() + set regexpengine& +endfunc + " vim: shiftwidth=2 sts=2 expandtab |