Merge #6479 from bfredl/tolower

remove vim_tolower/etc functions with broken locale-dependent behavior
author: Justin M. Keyes <justinkz@gmail.com> 2017-04-10 15:23:44 +0200
committer: GitHub <noreply@github.com> 2017-04-10 15:23:44 +0200
commit: fec53f0bdf5b871c1f6ca818d5a5c52118f5c266 (patch)
tree: fe6f9dc4ba3788fe6fea8ac599974904861257c3 /src
parent: dd7f41e5a04c14255893e8b986e42e4c62902e1b (diff)
parent: c1cf03398143f4dc0ac9155988edad349d24deca (diff)
download: rneovim-fec53f0bdf5b871c1f6ca818d5a5c52118f5c266.tar.gz
rneovim-fec53f0bdf5b871c1f6ca818d5a5c52118f5c266.tar.bz2
rneovim-fec53f0bdf5b871c1f6ca818d5a5c52118f5c266.zip
18 files changed, 332 insertions, 247 deletions
diff --git a/src/nvim/charset.c b/src/nvim/charset.c
index 99d3e2dd88..645139f696 100644
--- a/src/nvim/charset.c
+++ b/src/nvim/charset.c
@@ -212,8 +212,8 @@ int buf_init_chartab(buf_T *buf, int global)
         // work properly when 'encoding' is "latin1" and the locale is
         // "C".
         if (!do_isalpha
-            || vim_islower(c)
-            || vim_isupper(c)
+            || mb_islower(c)
+            || mb_isupper(c)
             || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))) {
           if (i == 0) {
             // (re)set ID flag
@@ -417,11 +417,11 @@ char_u* str_foldcase(char_u *str, int orglen, char_u *buf, int buflen)
   while (STR_CHAR(i) != NUL) {
     int c = utf_ptr2char(STR_PTR(i));
     int olen = utf_ptr2len(STR_PTR(i));
-    int lc = utf_tolower(c);
+    int lc = mb_tolower(c);
 
     // Only replace the character when it is not an invalid
     // sequence (ASCII character or more than one byte) and
-    // utf_tolower() doesn't return the original character.
+    // mb_tolower() doesn't return the original character.
     if (((c < 0x80) || (olen > 1)) && (c != lc)) {
       int nlen = utf_char2len(lc);
 
@@ -1506,67 +1506,6 @@ char_u* skiptohex(char_u *q)
   return p;
 }
 
-// Vim's own character class functions.  These exist because many library
-// islower()/toupper() etc. do not work properly: they crash when used with
-// invalid values or can't handle latin1 when the locale is C.
-// Speed is most important here.
-
-/// Check that the character is lower-case
-///
-/// @param  c  character to check
-bool vim_islower(int c)
-  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
-{
-  if (c <= '@') {
-    return false;
-  }
-
-  if (c >= 0x80) {
-    return utf_islower(c);
-  }
-  return islower(c);
-}
-
-/// Check that the character is upper-case
-///
-/// @param  c  character to check
-bool vim_isupper(int c)
-  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
-{
-  if (c <= '@') {
-    return false;
-  }
-
-  if (c >= 0x80) {
-      return utf_isupper(c);
-  }
-  return isupper(c);
-}
-
-int vim_toupper(int c)
-{
-  if (c <= '@') {
-    return c;
-  }
-
-  if (c >= 0x80) {
-    return utf_toupper(c);
-  }
-  return TOUPPER_LOC(c);
-}
-
-int vim_tolower(int c)
-{
-  if (c <= '@') {
-    return c;
-  }
-
-  if (c >= 0x80) {
-    return utf_tolower(c);
-  }
-  return TOLOWER_LOC(c);
-}
-
 /// Skip over text until ' ' or '\t' or NUL
 ///
 /// @param[in]  p  Text to skip over.
diff --git a/src/nvim/edit.c b/src/nvim/edit.c
index b35504908e..fe00027dec 100644
--- a/src/nvim/edit.c
+++ b/src/nvim/edit.c
@@ -2037,12 +2037,12 @@ int ins_compl_add_infercase(char_u *str, int len, int icase, char_u *fname, int
         } else {
           c = *(p++);
         }
-        if (vim_islower(c)) {
+        if (mb_islower(c)) {
           has_lower = true;
-          if (vim_isupper(wca[i])) {
+          if (mb_isupper(wca[i])) {
             // Rule 1 is satisfied.
             for (i = actual_compl_length; i < actual_len; i++) {
-              wca[i] = vim_tolower(wca[i]);
+              wca[i] = mb_tolower(wca[i]);
             }
             break;
           }
@@ -2062,14 +2062,14 @@ int ins_compl_add_infercase(char_u *str, int len, int icase, char_u *fname, int
         } else {
           c = *(p++);
         }
-        if (was_letter && vim_isupper(c) && vim_islower(wca[i])) {
+        if (was_letter && mb_isupper(c) && mb_islower(wca[i])) {
           // Rule 2 is satisfied.
           for (i = actual_compl_length; i < actual_len; i++) {
-            wca[i] = vim_toupper(wca[i]);
+            wca[i] = mb_toupper(wca[i]);
           }
           break;
         }
-        was_letter = vim_islower(c) || vim_isupper(c);
+        was_letter = mb_islower(c) || mb_isupper(c);
       }
     }
 
@@ -2082,10 +2082,10 @@ int ins_compl_add_infercase(char_u *str, int len, int icase, char_u *fname, int
         } else {
           c = *(p++);
         }
-        if (vim_islower(c)) {
-          wca[i] = vim_tolower(wca[i]);
-        } else if (vim_isupper(c)) {
-          wca[i] = vim_toupper(wca[i]);
+        if (mb_islower(c)) {
+          wca[i] = mb_tolower(wca[i]);
+        } else if (mb_isupper(c)) {
+          wca[i] = mb_toupper(wca[i]);
         }
       }
     }
@@ -2302,9 +2302,10 @@ static void ins_compl_longest_match(compl_T *match)
         c1 = *p;
         c2 = *s;
       }
-      if (match->cp_icase ? (vim_tolower(c1) != vim_tolower(c2))
-          : (c1 != c2))
+      if (match->cp_icase ? (mb_tolower(c1) != mb_tolower(c2))
+          : (c1 != c2)) {
         break;
+      }
       if (has_mbyte) {
         mb_ptr_adv(p);
         mb_ptr_adv(s);
diff --git a/src/nvim/eval.c b/src/nvim/eval.c
index 124d6acfe9..0663e19b9a 100644
--- a/src/nvim/eval.c
+++ b/src/nvim/eval.c
@@ -16791,30 +16791,9 @@ void timer_teardown(void)
  */
 static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr)
 {
-  char_u *p = (char_u *)xstrdup(tv_get_string(&argvars[0]));
   rettv->v_type = VAR_STRING;
-  rettv->vval.v_string = p;
-
-  while (*p != NUL) {
-    int l;
-
-    if (enc_utf8) {
-      int c, lc;
-
-      c = utf_ptr2char(p);
-      lc = utf_tolower(c);
-      l = utf_ptr2len(p);
-      /* TODO: reallocate string when byte count changes. */
-      if (utf_char2len(lc) == l)
-        utf_char2bytes(lc, p);
-      p += l;
-    } else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
-      p += l;                 /* skip multi-byte character */
-    else {
-      *p = TOLOWER_LOC(*p);         /* note that tolower() can be a macro */
-      ++p;
-    }
-  }
+  rettv->vval.v_string = (char_u *)strcase_save(tv_get_string(&argvars[0]),
+                                                false);
 }
 
 /*
@@ -16823,7 +16802,8 @@ static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr)
 static void f_toupper(typval_T *argvars, typval_T *rettv, FunPtr fptr)
 {
   rettv->v_type = VAR_STRING;
-  rettv->vval.v_string = (char_u *)strup_save(tv_get_string(&argvars[0]));
+  rettv->vval.v_string = (char_u *)strcase_save(tv_get_string(&argvars[0]),
+                                                true);
 }
 
 /*
diff --git a/src/nvim/ex_getln.c b/src/nvim/ex_getln.c
index 8810204c03..9d74f554ba 100644
--- a/src/nvim/ex_getln.c
+++ b/src/nvim/ex_getln.c
@@ -1231,7 +1231,7 @@ static int command_line_handle_key(CommandLineState *s)
         // command line has no uppercase characters, convert
         // the character to lowercase
         if (p_ic && p_scs && !pat_has_uppercase(ccline.cmdbuff)) {
-          s->c = vim_tolower(s->c);
+          s->c = mb_tolower(s->c);
         }
 
         if (s->c != NUL) {
@@ -3018,7 +3018,7 @@ ExpandOne (
                       || xp->xp_context == EXPAND_FILES
                       || xp->xp_context == EXPAND_SHELLCMD
                       || xp->xp_context == EXPAND_BUFFERS)) {
-          if (vim_tolower(c0) != vim_tolower(ci)) {
+          if (mb_tolower(c0) != mb_tolower(ci)) {
             break;
           }
         } else if (c0 != ci) {
diff --git a/src/nvim/file_search.c b/src/nvim/file_search.c
index 9592235905..db745bdd15 100644
--- a/src/nvim/file_search.c
+++ b/src/nvim/file_search.c
@@ -1057,7 +1057,7 @@ static bool ff_wc_equal(char_u *s1, char_u *s2)
     c1 = PTR2CHAR(s1 + i);
     c2 = PTR2CHAR(s2 + j);
 
-    if ((p_fic ? vim_tolower(c1) != vim_tolower(c2) : c1 != c2)
+    if ((p_fic ? mb_tolower(c1) != mb_tolower(c2) : c1 != c2)
         && (prev1 != '*' || prev2 != '*')) {
       return false;
     }
diff --git a/src/nvim/macros.h b/src/nvim/macros.h
index a8df6322cf..22fd48de9d 100644
--- a/src/nvim/macros.h
+++ b/src/nvim/macros.h
@@ -62,7 +62,7 @@
  * toupper() and tolower() that use the current locale.
  * Careful: Only call TOUPPER_LOC() and TOLOWER_LOC() with a character in the
  * range 0 - 255.  toupper()/tolower() on some systems can't handle others.
- * Note: It is often better to use vim_tolower() and vim_toupper(), because many
+ * Note: It is often better to use mb_tolower() and mb_toupper(), because many
  * toupper() and tolower() implementations only work for ASCII.
  */
 #define TOUPPER_LOC toupper
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index 460528b85f..b18459a2b5 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -1174,11 +1174,14 @@ int utf_fold(int a)
   return utf_convert(a, foldCase, ARRAY_SIZE(foldCase));
 }
 
-/*
- * Return the upper-case equivalent of "a", which is a UCS-4 character.  Use
- * simple case folding.
- */
-int utf_toupper(int a)
+// Vim's own character class functions.  These exist because many library
+// islower()/toupper() etc. do not work properly: they crash when used with
+// invalid values or can't handle latin1 when the locale is C.
+// Speed is most important here.
+
+/// Return the upper-case equivalent of "a", which is a UCS-4 character.  Use
+/// simple case folding.
+int mb_toupper(int a)
 {
   /* If 'casemap' contains "keepascii" use ASCII style toupper(). */
   if (a < 128 && (cmp_flags & CMP_KEEPASCII))
@@ -1198,17 +1201,15 @@ int utf_toupper(int a)
   return utf_convert(a, toUpper, ARRAY_SIZE(toUpper));
 }
 
-bool utf_islower(int a)
+bool mb_islower(int a)
 {
-  /* German sharp s is lower case but has no upper case equivalent. */
-  return (utf_toupper(a) != a) || a == 0xdf;
+  // German sharp s is lower case but has no upper case equivalent.
+  return (mb_toupper(a) != a) || a == 0xdf;
 }
 
-/*
- * Return the lower-case equivalent of "a", which is a UCS-4 character.  Use
- * simple case folding.
- */
-int utf_tolower(int a)
+/// Return the lower-case equivalent of "a", which is a UCS-4 character.  Use
+/// simple case folding.
+int mb_tolower(int a)
 {
   /* If 'casemap' contains "keepascii" use ASCII style tolower(). */
   if (a < 128 && (cmp_flags & CMP_KEEPASCII))
@@ -1228,9 +1229,9 @@ int utf_tolower(int a)
   return utf_convert(a, toLower, ARRAY_SIZE(toLower));
 }
 
-bool utf_isupper(int a)
+bool mb_isupper(int a)
 {
-  return utf_tolower(a) != a;
+  return mb_tolower(a) != a;
 }
 
 static int utf_strnicmp(const char_u *s1, const char_u *s2, size_t n1,
diff --git a/src/nvim/message.c b/src/nvim/message.c
index 1d3609291a..3e4a1e10b6 100644
--- a/src/nvim/message.c
+++ b/src/nvim/message.c
@@ -2730,8 +2730,8 @@ do_dialog (
         break;
       }
 
-      /* Make the character lowercase, as chars in "hotkeys" are. */
-      c = vim_tolower(c);
+      // Make the character lowercase, as chars in "hotkeys" are.
+      c = mb_tolower(c);
       retval = 1;
       for (i = 0; hotkeys[i]; ++i) {
         if (has_mbyte) {
@@ -2777,7 +2777,7 @@ copy_char (
 
   if (has_mbyte) {
     if (lowercase) {
-      c = vim_tolower((*mb_ptr2char)(from));
+      c = mb_tolower((*mb_ptr2char)(from));
       return (*mb_char2bytes)(c, to);
     } else {
       len = (*mb_ptr2len)(from);
diff --git a/src/nvim/ops.c b/src/nvim/ops.c
index 68ef27222c..f11d6b69b2 100644
--- a/src/nvim/ops.c
+++ b/src/nvim/ops.c
@@ -1956,16 +1956,18 @@ int swapchar(int op_type, pos_T *pos)
   if (enc_dbcs != 0 && c >= 0x100)      /* No lower/uppercase letter */
     return FALSE;
   nc = c;
-  if (vim_islower(c)) {
-    if (op_type == OP_ROT13)
+  if (mb_islower(c)) {
+    if (op_type == OP_ROT13) {
       nc = ROT13(c, 'a');
-    else if (op_type != OP_LOWER)
-      nc = vim_toupper(c);
-  } else if (vim_isupper(c)) {
-    if (op_type == OP_ROT13)
+    } else if (op_type != OP_LOWER) {
+      nc = mb_toupper(c);
+    }
+  } else if (mb_isupper(c)) {
+    if (op_type == OP_ROT13) {
       nc = ROT13(c, 'A');
-    else if (op_type != OP_UPPER)
-      nc = vim_tolower(c);
+    } else if (op_type != OP_UPPER) {
+      nc = mb_tolower(c);
+    }
   }
   if (nc != c) {
     if (enc_utf8 && (c >= 0x80 || nc >= 0x80)) {
@@ -3327,10 +3329,11 @@ void ex_display(exarg_T *eap)
 
     get_clipboard(name, &yb, true);
 
-    if (name == vim_tolower(redir_reg)
-        || (redir_reg == '"' && yb == y_previous))
-      continue;             /* do not list register being written to, the
-                             * pointer can be freed */
+    if (name == mb_tolower(redir_reg)
+        || (redir_reg == '"' && yb == y_previous)) {
+      continue;  // do not list register being written to, the
+                 // pointer can be freed
+    }
 
     if (yb->y_array != NULL) {
       msg_putchar('\n');
diff --git a/src/nvim/path.c b/src/nvim/path.c
index 6bf42ed2fa..205fc2ed62 100644
--- a/src/nvim/path.c
+++ b/src/nvim/path.c
@@ -1853,7 +1853,7 @@ int pathcmp(const char *p, const char *q, int maxlen)
       break;
     }
 
-    if ((p_fic ? vim_toupper(c1) != vim_toupper(c2) : c1 != c2)
+    if ((p_fic ? mb_toupper(c1) != mb_toupper(c2) : c1 != c2)
 #ifdef BACKSLASH_IN_FILENAME
         /* consider '/' and '\\' to be equal */
         && !((c1 == '/' && c2 == '\\')
@@ -1864,8 +1864,8 @@ int pathcmp(const char *p, const char *q, int maxlen)
         return -1;
       if (vim_ispathsep(c2))
         return 1;
-      return p_fic ? vim_toupper(c1) - vim_toupper(c2)
-             : c1 - c2;         /* no match */
+      return p_fic ? mb_toupper(c1) - mb_toupper(c2)
+                   : c1 - c2;  // no match
     }
 
     i += MB_PTR2LEN((char_u *)p + i);
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c
index 9baa53d2a2..4b5e17b00b 100644
--- a/src/nvim/regexp.c
+++ b/src/nvim/regexp.c
@@ -2350,7 +2350,7 @@ collection:
               break;
             case CLASS_LOWER:
               for (cu = 1; cu <= 255; cu++) {
-                if (vim_islower(cu) && cu != 170 && cu != 186) {
+                if (mb_islower(cu) && cu != 170 && cu != 186) {
                   regmbc(cu);
                 }
               }
@@ -2376,7 +2376,7 @@ collection:
               break;
             case CLASS_UPPER:
               for (cu = 1; cu <= 255; cu++) {
-                if (vim_isupper(cu)) {
+                if (mb_isupper(cu)) {
                   regmbc(cu);
                 }
               }
@@ -3474,7 +3474,7 @@ static long bt_regexec_both(char_u *line,
         || (ireg_ic
             && (((enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
                 || (c < 255 && prog->regstart < 255
-                    && vim_tolower(prog->regstart) == vim_tolower(c))))) {
+                    && mb_tolower(prog->regstart) == mb_tolower(c))))) {
       retval = regtry(prog, col);
     } else {
       retval = 0;
@@ -4155,7 +4155,7 @@ regmatch (
           if (*opnd != *reginput
               && (!ireg_ic
                   || (!enc_utf8
-                      && vim_tolower(*opnd) != vim_tolower(*reginput)))) {
+                      && mb_tolower(*opnd) != mb_tolower(*reginput)))) {
             status = RA_NOMATCH;
           } else if (*opnd == NUL) {
             // match empty string always works; happens when "~" is
@@ -4573,12 +4573,14 @@ regmatch (
           if (OP(next) == EXACTLY) {
             rst.nextb = *OPERAND(next);
             if (ireg_ic) {
-              if (vim_isupper(rst.nextb))
-                rst.nextb_ic = vim_tolower(rst.nextb);
-              else
-                rst.nextb_ic = vim_toupper(rst.nextb);
-            } else
+              if (mb_isupper(rst.nextb)) {
+                rst.nextb_ic = mb_tolower(rst.nextb);
+              } else {
+                rst.nextb_ic = mb_toupper(rst.nextb);
+              }
+            } else {
               rst.nextb_ic = rst.nextb;
+            }
           } else {
             rst.nextb = NUL;
             rst.nextb_ic = NUL;
@@ -5339,8 +5341,8 @@ do_class:
      * would have been used for it.  It does handle single-byte
      * characters, such as latin1. */
     if (ireg_ic) {
-      cu = vim_toupper(*opnd);
-      cl = vim_tolower(*opnd);
+      cu = mb_toupper(*opnd);
+      cl = mb_tolower(*opnd);
       while (count < maxcount && (*scan == cu || *scan == cl)) {
         count++;
         scan++;
@@ -6312,14 +6314,15 @@ static char_u *cstrchr(char_u *s, int c)
   /* tolower() and toupper() can be slow, comparing twice should be a lot
    * faster (esp. when using MS Visual C++!).
    * For UTF-8 need to use folded case. */
-  if (enc_utf8 && c > 0x80)
+  if (c > 0x80) {
     cc = utf_fold(c);
-  else if (vim_isupper(c))
-    cc = vim_tolower(c);
-  else if (vim_islower(c))
-    cc = vim_toupper(c);
-  else
+  } else if (mb_isupper(c)) {
+    cc = mb_tolower(c);
+  } else if (mb_islower(c)) {
+    cc = mb_toupper(c);
+  } else {
     return vim_strchr(s, c);
+  }
 
   if (has_mbyte) {
     for (p = s; *p != NUL; p += (*mb_ptr2len)(p)) {
@@ -6348,28 +6351,28 @@ static char_u *cstrchr(char_u *s, int c)
 
 static fptr_T do_upper(int *d, int c)
 {
-  *d = vim_toupper(c);
+  *d = mb_toupper(c);
 
   return (fptr_T)NULL;
 }
 
 static fptr_T do_Upper(int *d, int c)
 {
-  *d = vim_toupper(c);
+  *d = mb_toupper(c);
 
   return (fptr_T)do_Upper;
 }
 
 static fptr_T do_lower(int *d, int c)
 {
-  *d = vim_tolower(c);
+  *d = mb_tolower(c);
 
   return (fptr_T)NULL;
 }
 
 static fptr_T do_Lower(int *d, int c)
 {
-  *d = vim_tolower(c);
+  *d = mb_tolower(c);
 
   return (fptr_T)do_Lower;
 }
diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c
index 5b49ab38f0..caf26fdd35 100644
--- a/src/nvim/regexp_nfa.c
+++ b/src/nvim/regexp_nfa.c
@@ -4373,7 +4373,7 @@ static int check_char_class(int class, int c)
       return OK;
     break;
   case NFA_CLASS_LOWER:
-    if (vim_islower(c) && c != 170 && c != 186) {
+    if (mb_islower(c) && c != 170 && c != 186) {
       return OK;
     }
     break;
@@ -4391,8 +4391,9 @@ static int check_char_class(int class, int c)
       return OK;
     break;
   case NFA_CLASS_UPPER:
-    if (vim_isupper(c))
+    if (mb_isupper(c)) {
       return OK;
+    }
     break;
   case NFA_CLASS_XDIGIT:
     if (ascii_isxdigit(c))
@@ -4892,7 +4893,7 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text)
       int c2_len = PTR2LEN(s2);
       int c2 = PTR2CHAR(s2);
 
-      if ((c1 != c2 && (!ireg_ic || vim_tolower(c1) != vim_tolower(c2)))
+      if ((c1 != c2 && (!ireg_ic || mb_tolower(c1) != mb_tolower(c2)))
           || c1_len != c2_len) {
         match = false;
         break;
@@ -5585,22 +5586,24 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
               break;
             }
             if (ireg_ic) {
-              int curc_low = vim_tolower(curc);
-              int done = FALSE;
+              int curc_low = mb_tolower(curc);
+              int done = false;
 
-              for (; c1 <= c2; ++c1)
-                if (vim_tolower(c1) == curc_low) {
+              for (; c1 <= c2; c1++) {
+                if (mb_tolower(c1) == curc_low) {
                   result = result_if_matched;
                   done = TRUE;
                   break;
                 }
-              if (done)
+              }
+              if (done) {
                 break;
+              }
             }
           } else if (state->c < 0 ? check_char_class(state->c, curc)
                      : (curc == state->c
-                        || (ireg_ic && vim_tolower(curc)
-                            == vim_tolower(state->c)))) {
+                        || (ireg_ic && mb_tolower(curc)
+                            == mb_tolower(state->c)))) {
             result = result_if_matched;
             break;
           }
@@ -6003,8 +6006,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
 #endif
         result = (c == curc);
 
-        if (!result && ireg_ic)
-          result = vim_tolower(c) == vim_tolower(curc);
+        if (!result && ireg_ic) {
+          result = mb_tolower(c) == mb_tolower(curc);
+        }
 
         // If ireg_icombine is not set only skip over the character
         // itself.  When it is set skip over composing characters.
@@ -6152,8 +6156,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
             // Checking if the required start character matches is
             // cheaper than adding a state that won't match.
             c = PTR2CHAR(reginput + clen);
-            if (c != prog->regstart && (!ireg_ic || vim_tolower(c)
-                                        != vim_tolower(prog->regstart))) {
+            if (c != prog->regstart && (!ireg_ic || mb_tolower(c)
+                                        != mb_tolower(prog->regstart))) {
 #ifdef REGEXP_DEBUG
               fprintf(log_fd,
                   "  Skipping start state, regstart does not match\n");
diff --git a/src/nvim/search.c b/src/nvim/search.c
index c5c92b41c5..91a558045f 100644
--- a/src/nvim/search.c
+++ b/src/nvim/search.c
@@ -335,23 +335,26 @@ int pat_has_uppercase(char_u *pat)
   while (*p != NUL) {
     int l;
 
-    if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) {
-      if (enc_utf8 && utf_isupper(utf_ptr2char(p)))
-        return TRUE;
+    if ((l = mb_ptr2len(p)) > 1) {
+      if (mb_isupper(utf_ptr2char(p))) {
+        return true;
+      }
       p += l;
     } else if (*p == '\\') {
-      if (p[1] == '_' && p[2] != NUL)        /* skip "\_X" */
+      if (p[1] == '_' && p[2] != NUL) {  // skip "\_X"
         p += 3;
-      else if (p[1] == '%' && p[2] != NUL)        /* skip "\%X" */
+      } else if (p[1] == '%' && p[2] != NUL) {  // skip "\%X"
         p += 3;
-      else if (p[1] != NUL)        /* skip "\X" */
+      } else if (p[1] != NUL) {  // skip "\X"
         p += 2;
-      else
+      } else {
         p += 1;
-    } else if (vim_isupper(*p))
-      return TRUE;
-    else
-      ++p;
+      }
+    } else if (mb_isupper(*p)) {
+      return true;
+    } else {
+      p++;
+    }
   }
   return FALSE;
 }
diff --git a/src/nvim/spell.c b/src/nvim/spell.c
index d4f49bffb2..18febda1d8 100644
--- a/src/nvim/spell.c
+++ b/src/nvim/spell.c
@@ -2526,8 +2526,7 @@ void clear_spell_chartab(spelltab_T *sp)
   }
 }
 
-// Init the chartab used for spelling.  Only depends on 'encoding'.
-// Called once while starting up and when 'encoding' changes.
+// Init the chartab used for spelling. Called once while starting up.
 // The default is to use isalpha(), but the spell file should define the word
 // characters to make it possible that 'encoding' differs from the current
 // locale.  For utf-8 we don't use isalpha() but our own functions.
@@ -2537,36 +2536,17 @@ void init_spell_chartab(void)
 
   did_set_spelltab = false;
   clear_spell_chartab(&spelltab);
-  if (enc_dbcs) {
-    // DBCS: assume double-wide characters are word characters.
-    for (i = 128; i <= 255; ++i)
-      if (MB_BYTE2LEN(i) == 2)
-        spelltab.st_isw[i] = true;
-  } else if (enc_utf8)   {
-    for (i = 128; i < 256; ++i) {
-      int f = utf_fold(i);
-      int u = utf_toupper(i);
-
-      spelltab.st_isu[i] = utf_isupper(i);
-      spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
-      // The folded/upper-cased value is different between latin1 and
-      // utf8 for 0xb5, causing E763 for no good reason.  Use the latin1
-      // value for utf-8 to avoid this.
-      spelltab.st_fold[i] = (f < 256) ? f : i;
-      spelltab.st_upper[i] = (u < 256) ? u : i;
-    }
-  } else {
-    // Rough guess: use locale-dependent library functions.
-    for (i = 128; i < 256; ++i) {
-      if (vim_isupper(i)) {
-        spelltab.st_isw[i] = true;
-        spelltab.st_isu[i] = true;
-        spelltab.st_fold[i] = vim_tolower(i);
-      } else if (vim_islower(i))   {
-        spelltab.st_isw[i] = true;
-        spelltab.st_upper[i] = vim_toupper(i);
-      }
-    }
+  for (i = 128; i < 256; i++) {
+    int f = utf_fold(i);
+    int u = mb_toupper(i);
+
+    spelltab.st_isu[i] = mb_isupper(i);
+    spelltab.st_isw[i] = spelltab.st_isu[i] || mb_islower(i);
+    // The folded/upper-cased value is different between latin1 and
+    // utf8 for 0xb5, causing E763 for no good reason.  Use the latin1
+    // value for utf-8 to avoid this.
+    spelltab.st_fold[i] = (f < 256) ? f : i;
+    spelltab.st_upper[i] = (u < 256) ? u : i;
   }
 }
 
diff --git a/src/nvim/spell_defs.h b/src/nvim/spell_defs.h
index c54a7f5390..ddd54c724e 100644
--- a/src/nvim/spell_defs.h
+++ b/src/nvim/spell_defs.h
@@ -265,11 +265,11 @@ typedef struct trystate_S {
                          : (c) < \
                          256 ? (int)spelltab.st_fold[c] : (int)towlower(c))
 
-#define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
+#define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? mb_toupper(c) \
                           : (c) < \
                           256 ? (int)spelltab.st_upper[c] : (int)towupper(c))
 
-#define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
+#define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? mb_isupper(c) \
                           : (c) < 256 ? spelltab.st_isu[c] : iswupper(c))
 
 // First language that is loaded, start of the linked list of loaded
diff --git a/src/nvim/strings.c b/src/nvim/strings.c
index 5dcffe00e0..87e066d80a 100644
--- a/src/nvim/strings.c
+++ b/src/nvim/strings.c
@@ -291,14 +291,15 @@ void vim_strup(char_u *p)
   }
 }
 
-/// Make given string all upper-case
+/// Make given string all upper-case or all lower-case
 ///
-/// Handels multi-byte characters as good as possible.
+/// Handles multi-byte characters as good as possible.
 ///
 /// @param[in]  orig  Input string.
+/// @param[in]  upper If true make uppercase, otherwise lowercase
 ///
 /// @return [allocated] upper-cased string.
-char *strup_save(const char *const orig)
+char *strcase_save(const char *const orig, bool upper)
   FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL
 {
   char *res = xstrdup(orig);
@@ -307,33 +308,25 @@ char *strup_save(const char *const orig)
   while (*p != NUL) {
     int l;
 
-    if (enc_utf8) {
-      int c = utf_ptr2char((const char_u *)p);
-      int uc = utf_toupper(c);
-
-      // Reallocate string when byte count changes.  This is rare,
-      // thus it's OK to do another malloc()/free().
-      l = utf_ptr2len((const char_u *)p);
-      int newl = utf_char2len(uc);
-      if (newl != l) {
-        // TODO(philix): use xrealloc() in strup_save()
-        char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l));
-        memcpy(s, res, (size_t)(p - res));
-        STRCPY(s + (p - res) + newl, p + l);
-        p = s + (p - res);
-        xfree(res);
-        res = s;
-      }
-
-      utf_char2bytes(uc, (char_u *)p);
-      p += newl;
-    } else if (has_mbyte && (l = (*mb_ptr2len)((const char_u *)p)) > 1) {
-      p += l;  // Skip multi-byte character.
-    } else {
-      // note that toupper() can be a macro
-      *p = (char)(uint8_t)TOUPPER_LOC(*p);
-      p++;
+    int c = utf_ptr2char((const char_u *)p);
+    int uc = upper ? mb_toupper(c) : mb_tolower(c);
+
+    // Reallocate string when byte count changes.  This is rare,
+    // thus it's OK to do another malloc()/free().
+    l = utf_ptr2len((const char_u *)p);
+    int newl = utf_char2len(uc);
+    if (newl != l) {
+      // TODO(philix): use xrealloc() in strup_save()
+      char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l));
+      memcpy(s, res, (size_t)(p - res));
+      STRCPY(s + (p - res) + newl, p + l);
+      p = s + (p - res);
+      xfree(res);
+      res = s;
     }
+
+    utf_char2bytes(uc, (char_u *)p);
+    p += newl;
   }
 
   return res;
diff --git a/src/nvim/testdir/test_functions.vim b/src/nvim/testdir/test_functions.vim
index 81cb6314ce..3c258299c1 100644
--- a/src/nvim/testdir/test_functions.vim
+++ b/src/nvim/testdir/test_functions.vim
@@ -29,3 +29,147 @@ func Test_setbufvar_options()
   bwipe!
 endfunc
 
+func Test_tolower()
+  call assert_equal("", tolower(""))
+
+  " Test with all printable ASCII characters.
+  call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
+          \ tolower(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'))
+
+  if !has('multi_byte')
+    return
+  endif
+
+  " Test with a few uppercase diacritics.
+  call assert_equal("aàáâãäåāăąǎǟǡả", tolower("AÀÁÂÃÄÅĀĂĄǍǞǠẢ"))
+  call assert_equal("bḃḇ", tolower("BḂḆ"))
+  call assert_equal("cçćĉċč", tolower("CÇĆĈĊČ"))
+  call assert_equal("dďđḋḏḑ", tolower("DĎĐḊḎḐ"))
+  call assert_equal("eèéêëēĕėęěẻẽ", tolower("EÈÉÊËĒĔĖĘĚẺẼ"))
+  call assert_equal("fḟ ", tolower("FḞ "))
+  call assert_equal("gĝğġģǥǧǵḡ", tolower("GĜĞĠĢǤǦǴḠ"))
+  call assert_equal("hĥħḣḧḩ", tolower("HĤĦḢḦḨ"))
+  call assert_equal("iìíîïĩīĭįiǐỉ", tolower("IÌÍÎÏĨĪĬĮİǏỈ"))
+  call assert_equal("jĵ", tolower("JĴ"))
+  call assert_equal("kķǩḱḵ", tolower("KĶǨḰḴ"))
+  call assert_equal("lĺļľŀłḻ", tolower("LĹĻĽĿŁḺ"))
+  call assert_equal("mḿṁ", tolower("MḾṀ"))
+  call assert_equal("nñńņňṅṉ", tolower("NÑŃŅŇṄṈ"))
+  call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ"))
+  call assert_equal("pṕṗ", tolower("PṔṖ"))
+  call assert_equal("q", tolower("Q"))
+  call assert_equal("rŕŗřṙṟ", tolower("RŔŖŘṘṞ"))
+  call assert_equal("sśŝşšṡ", tolower("SŚŜŞŠṠ"))
+  call assert_equal("tţťŧṫṯ", tolower("TŢŤŦṪṮ"))
+  call assert_equal("uùúûüũūŭůűųưǔủ", tolower("UÙÚÛÜŨŪŬŮŰŲƯǓỦ"))
+  call assert_equal("vṽ", tolower("VṼ"))
+  call assert_equal("wŵẁẃẅẇ", tolower("WŴẀẂẄẆ"))
+  call assert_equal("xẋẍ", tolower("XẊẌ"))
+  call assert_equal("yýŷÿẏỳỷỹ", tolower("YÝŶŸẎỲỶỸ"))
+  call assert_equal("zźżžƶẑẕ", tolower("ZŹŻŽƵẐẔ"))
+
+  " Test with a few lowercase diacritics, which should remain unchanged.
+  call assert_equal("aàáâãäåāăąǎǟǡả", tolower("aàáâãäåāăąǎǟǡả"))
+  call assert_equal("bḃḇ", tolower("bḃḇ"))
+  call assert_equal("cçćĉċč", tolower("cçćĉċč"))
+  call assert_equal("dďđḋḏḑ", tolower("dďđḋḏḑ"))
+  call assert_equal("eèéêëēĕėęěẻẽ", tolower("eèéêëēĕėęěẻẽ"))
+  call assert_equal("fḟ", tolower("fḟ"))
+  call assert_equal("gĝğġģǥǧǵḡ", tolower("gĝğġģǥǧǵḡ"))
+  call assert_equal("hĥħḣḧḩẖ", tolower("hĥħḣḧḩẖ"))
+  call assert_equal("iìíîïĩīĭįǐỉ", tolower("iìíîïĩīĭįǐỉ"))
+  call assert_equal("jĵǰ", tolower("jĵǰ"))
+  call assert_equal("kķǩḱḵ", tolower("kķǩḱḵ"))
+  call assert_equal("lĺļľŀłḻ", tolower("lĺļľŀłḻ"))
+  call assert_equal("mḿṁ ", tolower("mḿṁ "))
+  call assert_equal("nñńņňŉṅṉ", tolower("nñńņňŉṅṉ"))
+  call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("oòóôõöøōŏőơǒǫǭỏ"))
+  call assert_equal("pṕṗ", tolower("pṕṗ"))
+  call assert_equal("q", tolower("q"))
+  call assert_equal("rŕŗřṙṟ", tolower("rŕŗřṙṟ"))
+  call assert_equal("sśŝşšṡ", tolower("sśŝşšṡ"))
+  call assert_equal("tţťŧṫṯẗ", tolower("tţťŧṫṯẗ"))
+  call assert_equal("uùúûüũūŭůűųưǔủ", tolower("uùúûüũūŭůűųưǔủ"))
+  call assert_equal("vṽ", tolower("vṽ"))
+  call assert_equal("wŵẁẃẅẇẘ", tolower("wŵẁẃẅẇẘ"))
+  call assert_equal("ẋẍ", tolower("ẋẍ"))
+  call assert_equal("yýÿŷẏẙỳỷỹ", tolower("yýÿŷẏẙỳỷỹ"))
+  call assert_equal("zźżžƶẑẕ", tolower("zźżžƶẑẕ"))
+
+  " According to https://twitter.com/jifa/status/625776454479970304
+  " Ⱥ (U+023A) and Ⱦ (U+023E) are the *only* code points to increase
+  " in length (2 to 3 bytes) when lowercased. So let's test them.
+  call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
+endfunc
+
+func Test_toupper()
+  call assert_equal("", toupper(""))
+
+  " Test with all printable ASCII characters.
+  call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~',
+          \ toupper(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'))
+
+  if !has('multi_byte')
+    return
+  endif
+
+  " Test with a few lowercase diacritics.
+  call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("aàáâãäåāăąǎǟǡả"))
+  call assert_equal("BḂḆ", toupper("bḃḇ"))
+  call assert_equal("CÇĆĈĊČ", toupper("cçćĉċč"))
+  call assert_equal("DĎĐḊḎḐ", toupper("dďđḋḏḑ"))
+  call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("eèéêëēĕėęěẻẽ"))
+  call assert_equal("FḞ", toupper("fḟ"))
+  call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("gĝğġģǥǧǵḡ"))
+  call assert_equal("HĤĦḢḦḨẖ", toupper("hĥħḣḧḩẖ"))
+  call assert_equal("IÌÍÎÏĨĪĬĮǏỈ", toupper("iìíîïĩīĭįǐỉ"))
+  call assert_equal("JĴǰ", toupper("jĵǰ"))
+  call assert_equal("KĶǨḰḴ", toupper("kķǩḱḵ"))
+  call assert_equal("LĹĻĽĿŁḺ", toupper("lĺļľŀłḻ"))
+  call assert_equal("MḾṀ ", toupper("mḿṁ "))
+  call assert_equal("NÑŃŅŇŉṄṈ", toupper("nñńņňŉṅṉ"))
+  call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("oòóôõöøōŏőơǒǫǭỏ"))
+  call assert_equal("PṔṖ", toupper("pṕṗ"))
+  call assert_equal("Q", toupper("q"))
+  call assert_equal("RŔŖŘṘṞ", toupper("rŕŗřṙṟ"))
+  call assert_equal("SŚŜŞŠṠ", toupper("sśŝşšṡ"))
+  call assert_equal("TŢŤŦṪṮẗ", toupper("tţťŧṫṯẗ"))
+  call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("uùúûüũūŭůűųưǔủ"))
+  call assert_equal("VṼ", toupper("vṽ"))
+  call assert_equal("WŴẀẂẄẆẘ", toupper("wŵẁẃẅẇẘ"))
+  call assert_equal("ẊẌ", toupper("ẋẍ"))
+  call assert_equal("YÝŸŶẎẙỲỶỸ", toupper("yýÿŷẏẙỳỷỹ"))
+  call assert_equal("ZŹŻŽƵẐẔ", toupper("zźżžƶẑẕ"))
+
+  " Test that uppercase diacritics, which should remain unchanged.
+  call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("AÀÁÂÃÄÅĀĂĄǍǞǠẢ"))
+  call assert_equal("BḂḆ", toupper("BḂḆ"))
+  call assert_equal("CÇĆĈĊČ", toupper("CÇĆĈĊČ"))
+  call assert_equal("DĎĐḊḎḐ", toupper("DĎĐḊḎḐ"))
+  call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("EÈÉÊËĒĔĖĘĚẺẼ"))
+  call assert_equal("FḞ ", toupper("FḞ "))
+  call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("GĜĞĠĢǤǦǴḠ"))
+  call assert_equal("HĤĦḢḦḨ", toupper("HĤĦḢḦḨ"))
+  call assert_equal("IÌÍÎÏĨĪĬĮİǏỈ", toupper("IÌÍÎÏĨĪĬĮİǏỈ"))
+  call assert_equal("JĴ", toupper("JĴ"))
+  call assert_equal("KĶǨḰḴ", toupper("KĶǨḰḴ"))
+  call assert_equal("LĹĻĽĿŁḺ", toupper("LĹĻĽĿŁḺ"))
+  call assert_equal("MḾṀ", toupper("MḾṀ"))
+  call assert_equal("NÑŃŅŇṄṈ", toupper("NÑŃŅŇṄṈ"))
+  call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ"))
+  call assert_equal("PṔṖ", toupper("PṔṖ"))
+  call assert_equal("Q", toupper("Q"))
+  call assert_equal("RŔŖŘṘṞ", toupper("RŔŖŘṘṞ"))
+  call assert_equal("SŚŜŞŠṠ", toupper("SŚŜŞŠṠ"))
+  call assert_equal("TŢŤŦṪṮ", toupper("TŢŤŦṪṮ"))
+  call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("UÙÚÛÜŨŪŬŮŰŲƯǓỦ"))
+  call assert_equal("VṼ", toupper("VṼ"))
+  call assert_equal("WŴẀẂẄẆ", toupper("WŴẀẂẄẆ"))
+  call assert_equal("XẊẌ", toupper("XẊẌ"))
+  call assert_equal("YÝŶŸẎỲỶỸ", toupper("YÝŶŸẎỲỶỸ"))
+  call assert_equal("ZŹŻŽƵẐẔ", toupper("ZŹŻŽƵẐẔ"))
+
+  call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
+endfunc
+
+
diff --git a/src/nvim/testdir/test_normal.vim b/src/nvim/testdir/test_normal.vim
index a22dca35cc..c529971528 100644
--- a/src/nvim/testdir/test_normal.vim
+++ b/src/nvim/testdir/test_normal.vim
@@ -1606,6 +1606,40 @@ fun! Test_normal30_changecase()
   norm! V~
   call assert_equal('THIS IS A simple test: äüöss', getline('.'))
 
+  " Turkish ASCII turns to multi-byte.  On Mac the Turkish locale is available
+  " but toupper()/tolower() don't do the right thing.
+  if !has('mac') && !has('osx')
+    try
+      lang tr_TR.UTF-8
+      set casemap=
+      call setline(1, 'iI')
+      1normal gUU
+      call assert_equal("\u0130I", getline(1))
+      call assert_equal("\u0130I", toupper("iI"))
+
+      call setline(1, 'iI')
+      1normal guu
+      call assert_equal("i\u0131", getline(1))
+      call assert_equal("i\u0131", tolower("iI"))
+
+      set casemap&
+      call setline(1, 'iI')
+      1normal gUU
+      call assert_equal("II", getline(1))
+      call assert_equal("II", toupper("iI"))
+
+      call setline(1, 'iI')
+      1normal guu
+      call assert_equal("ii", getline(1))
+      call assert_equal("ii", tolower("iI"))
+
+      lang en_US.UTF-8
+    catch /E197:/
+      " can't use Turkish locale
+      throw 'Skipped: Turkish locale not available'
+    endtry
+  endif
+
   " clean up
   bw!
 endfunc
author	Justin M. Keyes <justinkz@gmail.com>	2017-04-10 15:23:44 +0200
committer	GitHub <noreply@github.com>	2017-04-10 15:23:44 +0200
commit	fec53f0bdf5b871c1f6ca818d5a5c52118f5c266 (patch)
tree	fe6f9dc4ba3788fe6fea8ac599974904861257c3 /src
parent	dd7f41e5a04c14255893e8b986e42e4c62902e1b (diff)
parent	c1cf03398143f4dc0ac9155988edad349d24deca (diff)
download	rneovim-fec53f0bdf5b871c1f6ca818d5a5c52118f5c266.tar.gz rneovim-fec53f0bdf5b871c1f6ca818d5a5c52118f5c266.tar.bz2 rneovim-fec53f0bdf5b871c1f6ca818d5a5c52118f5c266.zip