1 files changed, 94 insertions, 139 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c
index d76da62c6d..15a701f022 100644
--- a/src/nvim/regexp.c
+++ b/src/nvim/regexp.c
@@ -772,13 +772,9 @@ static int get_equi_class(char_u **pp)
   char_u      *p = *pp;
 
   if (p[1] == '=') {
-    if (has_mbyte)
-      l = (*mb_ptr2len)(p + 2);
+    l = (*mb_ptr2len)(p + 2);
     if (p[l + 2] == '=' && p[l + 3] == ']') {
-      if (has_mbyte)
-        c = mb_ptr2char(p + 2);
-      else
-        c = p[2];
+      c = utf_ptr2char(p + 2);
       *pp += l + 4;
       return c;
     }
@@ -1107,13 +1103,9 @@ static int get_coll_element(char_u **pp)
   char_u      *p = *pp;
 
   if (p[0] != NUL && p[1] == '.') {
-    if (has_mbyte)
-      l = (*mb_ptr2len)(p + 2);
+    l = utfc_ptr2len(p + 2);
     if (p[l + 2] == '.' && p[l + 3] == ']') {
-      if (has_mbyte)
-        c = mb_ptr2char(p + 2);
-      else
-        c = p[2];
+      c = utf_ptr2char(p + 2);
       *pp += l + 4;
       return c;
     }
@@ -1299,10 +1291,7 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags)
     }
 
     if (OP(scan) == EXACTLY) {
-      if (has_mbyte)
-        r->regstart = (*mb_ptr2char)(OPERAND(scan));
-      else
-        r->regstart = *OPERAND(scan);
+      r->regstart = utf_ptr2char(OPERAND(scan));
     } else if (OP(scan) == BOW
                || OP(scan) == EOW
                || OP(scan) == NOTHING
@@ -1310,10 +1299,7 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags)
                || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE) {
       char_u *regnext_scan = regnext(scan);
       if (OP(regnext_scan) == EXACTLY) {
-        if (has_mbyte)
-          r->regstart = (*mb_ptr2char)(OPERAND(regnext_scan));
-        else
-          r->regstart = *OPERAND(regnext_scan);
+        r->regstart = utf_ptr2char(OPERAND(regnext_scan));
       }
     }
 
@@ -1678,9 +1664,8 @@ static char_u *regpiece(int *flagp)
   case Magic('@'):
   {
     int lop = END;
-    int nr;
+    int64_t nr = getdecchrs();
 
-    nr = getdecchrs();
     switch (no_Magic(getchr())) {
     case '=': lop = MATCH; break;                                 /* \@= */
     case '!': lop = NOMATCH; break;                               /* \@! */
@@ -1820,8 +1805,8 @@ static char_u *regatom(int *flagp)
     if (c == '[')
       goto collection;
 
-  /* "\_x" is character class plus newline */
-  /*FALLTHROUGH*/
+  // "\_x" is character class plus newline
+  FALLTHROUGH;
 
   /*
    * Character classes.
@@ -2101,7 +2086,7 @@ static char_u *regatom(int *flagp)
     case 'u':               /* %uabcd hex 4 */
     case 'U':               /* %U1234abcd hex 8 */
     {
-      int i;
+      int64_t i;
 
       switch (c) {
       case 'd': i = getdecchrs(); break;
@@ -2412,20 +2397,16 @@ collection:
               break;
             }
           } else {
-            if (has_mbyte) {
-              int len;
-
-              /* produce a multibyte character, including any
-               * following composing characters */
-              startc = mb_ptr2char(regparse);
-              len = (*mb_ptr2len)(regparse);
-              if (enc_utf8 && utf_char2len(startc) != len)
-                startc = -1;                    /* composing chars */
-              while (--len >= 0)
-                regc(*regparse++);
-            } else {
-              startc = *regparse++;
-              regc(startc);
+            // produce a multibyte character, including any
+            // following composing characters.
+            startc = utf_ptr2char(regparse);
+            int len = utfc_ptr2len(regparse);
+            if (utf_char2len(startc) != len) {
+              // composing chars
+              startc = -1;
+            }
+            while (--len >= 0) {
+              regc(*regparse++);
             }
           }
         }
@@ -2439,7 +2420,7 @@ collection:
       } else if (reg_strict)
         EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
     }
-  /* FALLTHROUGH */
+    FALLTHROUGH;
 
   default:
   {
@@ -2557,12 +2538,11 @@ static void regc(int b)
  */
 static void regmbc(int c)
 {
-  if (!has_mbyte && c > 0xff)
-    return;
-  if (regcode == JUST_CALC_SIZE)
-    regsize += (*mb_char2len)(c);
-  else
-    regcode += (*mb_char2bytes)(c, regcode);
+  if (regcode == JUST_CALC_SIZE) {
+    regsize += utf_char2len(c);
+  } else {
+    regcode += utf_char2bytes(c, regcode);
+  }
 }
 
 /*
@@ -2906,17 +2886,13 @@ static int peekchr(void)
        * Next character can never be (made) magic?
        * Then backslashing it won't do anything.
        */
-      if (has_mbyte)
-        curchr = (*mb_ptr2char)(regparse + 1);
-      else
-        curchr = c;
+      curchr = utf_ptr2char(regparse + 1);
     }
     break;
   }
 
   default:
-    if (has_mbyte)
-      curchr = (*mb_ptr2char)(regparse);
+    curchr = utf_ptr2char(regparse);
   }
 
   return curchr;
@@ -2998,9 +2974,9 @@ static void ungetchr(void)
  * The parameter controls the maximum number of input characters. This will be
  * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
  */
-static int gethexchrs(int maxinputlen)
+static int64_t gethexchrs(int maxinputlen)
 {
-  int nr = 0;
+  int64_t nr = 0;
   int c;
   int i;
 
@@ -3022,9 +2998,9 @@ static int gethexchrs(int maxinputlen)
  * Get and return the value of the decimal string immediately after the
  * current position. Return -1 for invalid.  Consumes all digits.
  */
-static int getdecchrs(void)
+static int64_t getdecchrs(void)
 {
-  int nr = 0;
+  int64_t nr = 0;
   int c;
   int i;
 
@@ -3051,9 +3027,9 @@ static int getdecchrs(void)
  *     blahblah\%o210asdf
  *	   before-^  ^-after
  */
-static int getoctchrs(void)
+static int64_t getoctchrs(void)
 {
-  int nr = 0;
+  int64_t nr = 0;
   int c;
   int i;
 
@@ -3077,7 +3053,7 @@ static int getoctchrs(void)
  */
 static int coll_get_char(void)
 {
-  int nr = -1;
+  int64_t nr = -1;
 
   switch (*regparse++) {
   case 'd': nr = getdecchrs(); break;
@@ -3466,12 +3442,7 @@ static long bt_regexec_both(char_u *line,
 
   /* If there is a "must appear" string, look for it. */
   if (prog->regmust != NULL) {
-    int c;
-
-    if (has_mbyte)
-      c = (*mb_ptr2char)(prog->regmust);
-    else
-      c = *prog->regmust;
+    int c = utf_ptr2char(prog->regmust);
     s = line + col;
 
     // This is used very often, esp. for ":global".  Use two versions of
@@ -3502,16 +3473,11 @@ static long bt_regexec_both(char_u *line,
 
   /* Simplest case: Anchored match need be tried only once. */
   if (prog->reganch) {
-    int c;
-
-    if (has_mbyte)
-      c = (*mb_ptr2char)(regline + col);
-    else
-      c = regline[col];
+    int c = utf_ptr2char(regline + col);
     if (prog->regstart == NUL
         || prog->regstart == c
         || (rex.reg_ic
-            && (((enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
+            && (utf_fold(prog->regstart) == utf_fold(c)
                 || (c < 255 && prog->regstart < 255
                     && mb_tolower(prog->regstart) == mb_tolower(c))))) {
       retval = regtry(prog, col);
@@ -3686,8 +3652,7 @@ static long regtry(bt_regprog_T *prog, colnr_T col)
 static int reg_prev_class(void)
 {
   if (reginput > regline) {
-    return mb_get_class_tab(reginput - 1 - (*mb_head_off)(regline,
-                                                          reginput - 1),
+    return mb_get_class_tab(reginput - 1 - utf_head_off(regline, reginput - 1),
                             rex.reg_buf->b_chartab);
   }
   return -1;
@@ -3858,12 +3823,10 @@ regmatch (
       } else if (rex.reg_line_lbr && WITH_NL(op) && *reginput == '\n') {
         ADVANCE_REGINPUT();
       } else {
-        if (WITH_NL(op))
+        if (WITH_NL(op)) {
           op -= ADD_NL;
-        if (has_mbyte)
-          c = (*mb_ptr2char)(reginput);
-        else
-          c = *reginput;
+        }
+        c = utf_ptr2char(reginput);
         switch (op) {
         case BOL:
           if (reginput != regline)
@@ -4956,13 +4919,13 @@ regmatch (
                   (colnr_T)STRLEN(regline);
               }
             } else {
-              if (has_mbyte) {
-                rp->rs_un.regsave.rs_u.pos.col -=
-                  (*mb_head_off)(regline, regline
-                                 + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
-              } else {
-                rp->rs_un.regsave.rs_u.pos.col--;
-              }
+              const char_u *const line =
+                  reg_getline(behind_pos.rs_u.pos.lnum);
+
+              rp->rs_un.regsave.rs_u.pos.col -=
+                  utf_head_off(line,
+                               line + rp->rs_un.regsave.rs_u.pos.col - 1)
+                  + 1;
             }
           } else {
             if (rp->rs_un.regsave.rs_u.ptr == regline) {
@@ -5191,8 +5154,8 @@ regrepeat (
 
   case IDENT:
   case IDENT + ADD_NL:
-    testval = TRUE;
-  /*FALLTHROUGH*/
+    testval = 1;
+    FALLTHROUGH;
   case SIDENT:
   case SIDENT + ADD_NL:
     while (count < maxcount) {
@@ -5218,8 +5181,8 @@ regrepeat (
 
   case KWORD:
   case KWORD + ADD_NL:
-    testval = TRUE;
-  /*FALLTHROUGH*/
+    testval = 1;
+    FALLTHROUGH;
   case SKWORD:
   case SKWORD + ADD_NL:
     while (count < maxcount) {
@@ -5247,8 +5210,8 @@ regrepeat (
 
   case FNAME:
   case FNAME + ADD_NL:
-    testval = TRUE;
-  /*FALLTHROUGH*/
+    testval = 1;
+    FALLTHROUGH;
   case SFNAME:
   case SFNAME + ADD_NL:
     while (count < maxcount) {
@@ -5275,8 +5238,8 @@ regrepeat (
 
   case PRINT:
   case PRINT + ADD_NL:
-    testval = TRUE;
-  /*FALLTHROUGH*/
+    testval = 1;
+    FALLTHROUGH;
   case SPRINT:
   case SPRINT + ADD_NL:
     while (count < maxcount) {
@@ -5454,8 +5417,8 @@ do_class:
 
   case ANYOF:
   case ANYOF + ADD_NL:
-    testval = TRUE;
-  /*FALLTHROUGH*/
+    testval = 1;
+    FALLTHROUGH;
 
   case ANYBUT:
   case ANYBUT + ADD_NL:
@@ -5473,8 +5436,8 @@ do_class:
         }
       } else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p))) {
         scan++;
-      } else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1) {
-        if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval) {
+      } else if ((len = utfc_ptr2len(scan)) > 1) {
+        if ((cstrchr(opnd, utf_ptr2char(scan)) == NULL) == testval) {
           break;
         }
         scan += len;
@@ -6782,40 +6745,36 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest,
           }
             c = *src++;
           }
-        } else if (has_mbyte)
-          c = mb_ptr2char(src - 1);
-
-        /* Write to buffer, if copy is set. */
-        if (func_one != (fptr_T)NULL)
-          /* Turbo C complains without the typecast */
+        } else {
+          c = utf_ptr2char(src - 1);
+        }
+        // Write to buffer, if copy is set.
+        if (func_one != NULL) {
           func_one = (fptr_T)(func_one(&cc, c));
-        else if (func_all != (fptr_T)NULL)
-          /* Turbo C complains without the typecast */
+        } else if (func_all != NULL) {
           func_all = (fptr_T)(func_all(&cc, c));
-        else     /* just copy */
+        } else {
+          // just copy
           cc = c;
+        }
 
-        if (has_mbyte) {
-          int totlen = mb_ptr2len(src - 1);
+        int totlen = utfc_ptr2len(src - 1);
 
-          if (copy)
-            mb_char2bytes(cc, dst);
-          dst += mb_char2len(cc) - 1;
-          if (enc_utf8) {
-            int clen = utf_ptr2len(src - 1);
+        if (copy) {
+          utf_char2bytes(cc, dst);
+        }
+        dst += utf_char2len(cc) - 1;
+        int clen = utf_ptr2len(src - 1);
 
-            /* If the character length is shorter than "totlen", there
-             * are composing characters; copy them as-is. */
-            if (clen < totlen) {
-              if (copy)
-                memmove(dst + 1, src - 1 + clen,
-                    (size_t)(totlen - clen));
-              dst += totlen - clen;
-            }
+        // If the character length is shorter than "totlen", there
+        // are composing characters; copy them as-is.
+        if (clen < totlen) {
+          if (copy) {
+            memmove(dst + 1, src - 1 + clen, (size_t)(totlen - clen));
           }
-          src += totlen - 1;
-        } else if (copy)
-          *dst = cc;
+          dst += totlen - clen;
+        }
+        src += totlen - 1;
         dst++;
       } else {
         if (REG_MULTI) {
@@ -6878,10 +6837,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest,
                 }
                 dst += 2;
               } else {
-                if (has_mbyte)
-                  c = mb_ptr2char(s);
-                else
-                  c = *s;
+                c = utf_ptr2char(s);
 
                 if (func_one != (fptr_T)NULL)
                   /* Turbo C complains without the typecast */
@@ -6895,20 +6851,19 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest,
                 if (has_mbyte) {
                   int l;
 
-                  /* Copy composing characters separately, one
-                   * at a time. */
-                  if (enc_utf8)
-                    l = utf_ptr2len(s) - 1;
-                  else
-                    l = mb_ptr2len(s) - 1;
+                  // Copy composing characters separately, one
+                  // at a time.
+                  l = utf_ptr2len(s) - 1;
 
                   s += l;
                   len -= l;
-                  if (copy)
-                    mb_char2bytes(cc, dst);
-                  dst += mb_char2len(cc) - 1;
-                } else if (copy)
+                  if (copy) {
+                    utf_char2bytes(cc, dst);
+                  }
+                  dst += utf_char2len(cc) - 1;
+                } else if (copy) {
                   *dst = cc;
+                }
                 dst++;
               }