vim-patch:9.1.0297: Patch 9.1.0296 causes too many issues (#28263)

Problem: Patch 9.1.0296 causes too many issues (Tony Mechelynck, chdiza, CI) Solution: Back out the change for now Revert "patch 9.1.0296: regexp: engines do not handle case-folding well" This reverts commit 7a27c108e0509f3255ebdcb6558e896c223e4d23 it causes issues with syntax highlighting and breaks the FreeBSD and MacOS CI. It needs more work. fixes: vim/vim#14487 https://github.com/vim/vim/commit/c97f4d61cde24030f2f7d2318e1b409a0ccc3e43 Co-authored-by: Christian Brabandt <cb@256bit.org>
author: zeertzjq <zeertzjq@outlook.com> 2024-04-11 07:40:16 +0800
committer: GitHub <noreply@github.com> 2024-04-11 07:40:16 +0800
commit: d0afb2dc4eb8e70942441b3c9a551dcccd6806cd (patch)
tree: 627bdbec7314d38bf17245b0ab0be4bd4e85c419
parent: adb70a351d64129c4e12febf7cbe528ef66c8eb6 (diff)
download: rneovim-d0afb2dc4eb8e70942441b3c9a551dcccd6806cd.tar.gz
rneovim-d0afb2dc4eb8e70942441b3c9a551dcccd6806cd.tar.bz2
rneovim-d0afb2dc4eb8e70942441b3c9a551dcccd6806cd.zip
4 files changed, 14 insertions, 52 deletions
diff --git a/runtime/doc/dev_vimpatch.txt b/runtime/doc/dev_vimpatch.txt
index 5cc7a70ba2..1f48324d46 100644
--- a/runtime/doc/dev_vimpatch.txt
+++ b/runtime/doc/dev_vimpatch.txt
@@ -204,7 +204,6 @@ information.
   mb_ptr2char                                          utf_ptr2char
   mb_head_off                                          utf_head_off
   mb_tail_off                                          utf_cp_bounds
-  mb_strnicmp2                                         utf_strnicmp
   mb_lefthalve                                        grid_lefthalve
   mb_fix_col                                           grid_fix_col
   utf_off2cells                                       grid_off2cells
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index 0a7bb78102..c7a56209e4 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -1387,7 +1387,7 @@ bool mb_isalpha(int a)
   return mb_islower(a) || mb_isupper(a);
 }
 
-int utf_strnicmp(const char *s1, const char *s2, size_t n1, size_t n2)
+static int utf_strnicmp(const char *s1, const char *s2, size_t n1, size_t n2)
 {
   int c1, c2;
   char buffer[6];
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c
index 32fb086ca6..a81990670a 100644
--- a/src/nvim/regexp.c
+++ b/src/nvim/regexp.c
@@ -1627,9 +1627,7 @@ static void mb_decompose(int c, int *c1, int *c2, int *c3)
 
 /// Compare two strings, ignore case if rex.reg_ic set.
 /// Return 0 if strings match, non-zero otherwise.
-/// Correct the length "*n" when composing characters are ignored
-/// or for utf8 when both utf codepoints are considered equal because of
-/// case-folding but have different length (e.g. 's' and 'ſ')
+/// Correct the length "*n" when composing characters are ignored.
 static int cstrncmp(char *s1, char *s2, int *n)
 {
   int result;
@@ -1637,11 +1635,8 @@ static int cstrncmp(char *s1, char *s2, int *n)
   if (!rex.reg_ic) {
     result = strncmp(s1, s2, (size_t)(*n));
   } else {
-    int l2 = utfc_ptr2len(s2);
-    result = utf_strnicmp(s1, s2, (size_t)(*n), (size_t)l2);
-    if (result == 0 && l2 < *n) {
-      *n = l2;
-    }
+    assert(*n >= 0);
+    result = mb_strnicmp(s1, s2, (size_t)(*n));
   }
 
   // if it failed and it's utf8 and we want to combineignore:
@@ -6495,9 +6490,11 @@ static bool regmatch(uint8_t *scan, const proftime_T *tm, int *timed_out)
               }
             }
           } else {
-            if (cstrncmp((char *)opnd, (char *)rex.input, &len) != 0) {
-              status = RA_NOMATCH;
-              break;
+            for (i = 0; i < len; i++) {
+              if (opnd[i] != rex.input[i]) {
+                status = RA_NOMATCH;
+                break;
+              }
             }
           }
           rex.input += len;
@@ -13849,25 +13846,19 @@ static int skip_to_start(int c, colnr_T *colp)
 static int find_match_text(colnr_T *startcol, int regstart, uint8_t *match_text)
 {
   colnr_T col = *startcol;
-  const int regstart_len = utf_char2len(regstart);
+  const int regstart_len = utf_ptr2len((char *)rex.line + col);
 
   while (true) {
     bool match = true;
     uint8_t *s1 = match_text;
-    // skip regstart
-    uint8_t *s2 = rex.line + col + regstart_len;
-    if (regstart_len > 1
-        && utf_char2len(utf_ptr2char((char *)rex.line + col)) != regstart_len) {
-      // because of case-folding of the previously matched text, we may need
-      // to skip fewer bytes than utf_char2len(regstart)
-      s2 = rex.line + col + utf_char2len(utf_fold(regstart));
-    }
+    uint8_t *s2 = rex.line + col + regstart_len;  // skip regstart
     while (*s1) {
       int c1_len = utf_ptr2len((char *)s1);
       int c1 = utf_ptr2char((char *)s1);
       int c2_len = utf_ptr2len((char *)s2);
       int c2 = utf_ptr2char((char *)s2);
-      if (c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2))) {
+      if ((c1 != c2 && (!rex.reg_ic || utf_fold(c1) != utf_fold(c2)))
+          || c1_len != c2_len) {
         match = false;
         break;
       }
@@ -15531,7 +15522,7 @@ static int nfa_regexec_both(uint8_t *line, colnr_T startcol, proftime_T *tm, int
 
     // If match_text is set it contains the full text that must match.
     // Nothing else to try. Doesn't handle combining chars well.
-    if (prog->match_text != NULL && *prog->match_text != NUL && !rex.reg_icombine) {
+    if (prog->match_text != NULL && !rex.reg_icombine) {
       retval = find_match_text(&col, prog->regstart, prog->match_text);
       if (REG_MULTI) {
         rex.reg_mmatch->rmm_matchcol = col;
diff --git a/test/old/testdir/test_regexp_utf8.vim b/test/old/testdir/test_regexp_utf8.vim
index b3f5e346a0..97f48a0c09 100644
--- a/test/old/testdir/test_regexp_utf8.vim
+++ b/test/old/testdir/test_regexp_utf8.vim
@@ -611,32 +611,4 @@ func Test_combining_chars_in_collection()
   bw!
 endfunc
 
-func Test_search_multibyte_match_ascii()
-  new
-  " Match single 'ſ' and 's'
-  call setline(1,  'das abc heraus abc ſich abc ſind')
-  for i in range(0, 2)
-    exe "set re="..i
-    let ic_match = matchbufline('%', '\c\%u17f', 1, '$')->mapnew({idx, val -> val.text})
-    let noic_match = matchbufline('%', '\C\%u17f', 1, '$')->mapnew({idx, val -> val.text})
-    call assert_equal(['s', 's', 'ſ','ſ'], ic_match, "Ignorecase Regex-engine: " .. &re)
-    call assert_equal(['ſ','ſ'], noic_match, "No-Ignorecase Regex-engine: " .. &re)
-  endfor
-  " Match several 'ſſ' and 'ss'
-  call setline(1,  'das abc herauss abc ſſich abc ſind')
-  for i in range(0, 2)
-    exe "set re="..i
-    let ic_match = matchbufline('%', '\c\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text})
-    let noic_match = matchbufline('%', '\C\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text})
-    let ic_match2 = matchbufline('%', '\c\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
-    let noic_match2 = matchbufline('%', '\C\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
-
-    call assert_equal(['ss', 'ſſ'], ic_match, "Ignorecase Regex-engine: " .. &re)
-    call assert_equal(['ſſ'], noic_match, "No-Ignorecase Regex-engine: " .. &re)
-    call assert_equal(['s', 'ss', 'ſſ', 'ſ'], ic_match2, "Ignorecase Regex-engine: " .. &re)
-    call assert_equal(['ſſ','ſ'], noic_match2, "No-Ignorecase Regex-engine: " .. &re)
-  endfor
-  bw!
-endfunc
-
 " vim: shiftwidth=2 sts=2 expandtab
author	zeertzjq <zeertzjq@outlook.com>	2024-04-11 07:40:16 +0800
committer	GitHub <noreply@github.com>	2024-04-11 07:40:16 +0800
commit	d0afb2dc4eb8e70942441b3c9a551dcccd6806cd (patch)
tree	627bdbec7314d38bf17245b0ab0be4bd4e85c419
parent	adb70a351d64129c4e12febf7cbe528ef66c8eb6 (diff)
download	rneovim-d0afb2dc4eb8e70942441b3c9a551dcccd6806cd.tar.gz rneovim-d0afb2dc4eb8e70942441b3c9a551dcccd6806cd.tar.bz2 rneovim-d0afb2dc4eb8e70942441b3c9a551dcccd6806cd.zip