vim-patch:8.2.0938: NFA regexp uses tolower ()to compare ignore-case

Problem: NFA regexp uses tolower() to compare ignore-case. (Thayne McCombs) Solution: Use utf_fold() when possible. (ref. neovim vim/vim#12456) https://github.com/vim/vim/commit/59de417b904bbd204e313f015839317b577bd124
author: Jan Edmund Lazo <jan.lazo@mail.utoronto.ca> 2020-06-09 18:17:47 -0400
committer: Jan Edmund Lazo <jan.lazo@mail.utoronto.ca> 2020-06-18 18:01:42 -0400
commit: 33985a3a15fac1486cc11e8df272a0eeaa5fc77d (patch)
tree: 5f182fb2f99ee0d1e42052e2a314d10f6de607f1
parent: b7cae2bca3028c170871f05670bbd6a7ad508c56 (diff)
download: rneovim-33985a3a15fac1486cc11e8df272a0eeaa5fc77d.tar.gz
rneovim-33985a3a15fac1486cc11e8df272a0eeaa5fc77d.tar.bz2
rneovim-33985a3a15fac1486cc11e8df272a0eeaa5fc77d.zip
2 files changed, 28 insertions, 8 deletions
diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c
index 2ca5f42e51..387732fdee 100644
--- a/src/nvim/regexp_nfa.c
+++ b/src/nvim/regexp_nfa.c
@@ -4960,7 +4960,7 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text)
       int c2_len = PTR2LEN(s2);
       int c2 = PTR2CHAR(s2);
 
-      if ((c1 != c2 && (!rex.reg_ic || mb_tolower(c1) != mb_tolower(c2)))
+      if ((c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
           || c1_len != c2_len) {
         match = false;
         break;
@@ -5682,11 +5682,11 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
               break;
             }
             if (rex.reg_ic) {
-              int curc_low = mb_tolower(curc);
+              int curc_low = utf_fold(curc);
               int done = false;
 
               for (; c1 <= c2; c1++) {
-                if (mb_tolower(c1) == curc_low) {
+                if (utf_fold(c1) == curc_low) {
                   result = result_if_matched;
                   done = TRUE;
                   break;
@@ -5698,8 +5698,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
             }
           } else if (state->c < 0 ? check_char_class(state->c, curc)
                      : (curc == state->c
-                        || (rex.reg_ic && mb_tolower(curc)
-                            == mb_tolower(state->c)))) {
+                        || (rex.reg_ic
+                            && utf_fold(curc) == utf_fold(state->c)))) {
             result = result_if_matched;
             break;
           }
@@ -6106,7 +6106,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
         result = (c == curc);
 
         if (!result && rex.reg_ic) {
-          result = mb_tolower(c) == mb_tolower(curc);
+          result = utf_fold(c) == utf_fold(curc);
         }
 
         // If rex.reg_icombine is not set only skip over the character
@@ -6260,8 +6260,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
             // Checking if the required start character matches is
             // cheaper than adding a state that won't match.
             c = PTR2CHAR(reginput + clen);
-            if (c != prog->regstart && (!rex.reg_ic || mb_tolower(c)
-                                        != mb_tolower(prog->regstart))) {
+            if (c != prog->regstart
+                && (!rex.reg_ic
+                    || utf_fold(c) != utf_fold(prog->regstart))) {
 #ifdef REGEXP_DEBUG
               fprintf(log_fd,
                   "  Skipping start state, regstart does not match\n");
diff --git a/src/nvim/testdir/test_regexp_utf8.vim b/src/nvim/testdir/test_regexp_utf8.vim
index ecd0e8d56b..f48458566b 100644
--- a/src/nvim/testdir/test_regexp_utf8.vim
+++ b/src/nvim/testdir/test_regexp_utf8.vim
@@ -332,4 +332,23 @@ func Test_ambiwidth()
   set regexpengine& ambiwidth&
 endfunc
 
+func Run_regexp_ignore_case()
+  call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g'))
+
+  call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g'))
+  call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g'))
+  call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g'))
+  call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g'))
+  call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g'))
+  call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g'))
+endfunc
+
+func Test_regexp_ignore_case()
+  set regexpengine=1
+  call Run_regexp_ignore_case()
+  set regexpengine=2
+  call Run_regexp_ignore_case()
+  set regexpengine&
+endfunc
+
 " vim: shiftwidth=2 sts=2 expandtab
author	Jan Edmund Lazo <jan.lazo@mail.utoronto.ca>	2020-06-09 18:17:47 -0400
committer	Jan Edmund Lazo <jan.lazo@mail.utoronto.ca>	2020-06-18 18:01:42 -0400
commit	33985a3a15fac1486cc11e8df272a0eeaa5fc77d (patch)
tree	5f182fb2f99ee0d1e42052e2a314d10f6de607f1
parent	b7cae2bca3028c170871f05670bbd6a7ad508c56 (diff)
download	rneovim-33985a3a15fac1486cc11e8df272a0eeaa5fc77d.tar.gz rneovim-33985a3a15fac1486cc11e8df272a0eeaa5fc77d.tar.bz2 rneovim-33985a3a15fac1486cc11e8df272a0eeaa5fc77d.zip