diff options
author | Justin M. Keyes <justinkz@gmail.com> | 2017-04-10 15:23:44 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-04-10 15:23:44 +0200 |
commit | fec53f0bdf5b871c1f6ca818d5a5c52118f5c266 (patch) | |
tree | fe6f9dc4ba3788fe6fea8ac599974904861257c3 /src | |
parent | dd7f41e5a04c14255893e8b986e42e4c62902e1b (diff) | |
parent | c1cf03398143f4dc0ac9155988edad349d24deca (diff) | |
download | rneovim-fec53f0bdf5b871c1f6ca818d5a5c52118f5c266.tar.gz rneovim-fec53f0bdf5b871c1f6ca818d5a5c52118f5c266.tar.bz2 rneovim-fec53f0bdf5b871c1f6ca818d5a5c52118f5c266.zip |
Merge #6479 from bfredl/tolower
remove vim_tolower/etc functions with broken locale-dependent behavior
Diffstat (limited to 'src')
-rw-r--r-- | src/nvim/charset.c | 69 | ||||
-rw-r--r-- | src/nvim/edit.c | 25 | ||||
-rw-r--r-- | src/nvim/eval.c | 28 | ||||
-rw-r--r-- | src/nvim/ex_getln.c | 4 | ||||
-rw-r--r-- | src/nvim/file_search.c | 2 | ||||
-rw-r--r-- | src/nvim/macros.h | 2 | ||||
-rw-r--r-- | src/nvim/mbyte.c | 31 | ||||
-rw-r--r-- | src/nvim/message.c | 6 | ||||
-rw-r--r-- | src/nvim/ops.c | 27 | ||||
-rw-r--r-- | src/nvim/path.c | 6 | ||||
-rw-r--r-- | src/nvim/regexp.c | 45 | ||||
-rw-r--r-- | src/nvim/regexp_nfa.c | 32 | ||||
-rw-r--r-- | src/nvim/search.c | 25 | ||||
-rw-r--r-- | src/nvim/spell.c | 44 | ||||
-rw-r--r-- | src/nvim/spell_defs.h | 4 | ||||
-rw-r--r-- | src/nvim/strings.c | 51 | ||||
-rw-r--r-- | src/nvim/testdir/test_functions.vim | 144 | ||||
-rw-r--r-- | src/nvim/testdir/test_normal.vim | 34 |
18 files changed, 332 insertions, 247 deletions
diff --git a/src/nvim/charset.c b/src/nvim/charset.c index 99d3e2dd88..645139f696 100644 --- a/src/nvim/charset.c +++ b/src/nvim/charset.c @@ -212,8 +212,8 @@ int buf_init_chartab(buf_T *buf, int global) // work properly when 'encoding' is "latin1" and the locale is // "C". if (!do_isalpha - || vim_islower(c) - || vim_isupper(c) + || mb_islower(c) + || mb_isupper(c) || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))) { if (i == 0) { // (re)set ID flag @@ -417,11 +417,11 @@ char_u* str_foldcase(char_u *str, int orglen, char_u *buf, int buflen) while (STR_CHAR(i) != NUL) { int c = utf_ptr2char(STR_PTR(i)); int olen = utf_ptr2len(STR_PTR(i)); - int lc = utf_tolower(c); + int lc = mb_tolower(c); // Only replace the character when it is not an invalid // sequence (ASCII character or more than one byte) and - // utf_tolower() doesn't return the original character. + // mb_tolower() doesn't return the original character. if (((c < 0x80) || (olen > 1)) && (c != lc)) { int nlen = utf_char2len(lc); @@ -1506,67 +1506,6 @@ char_u* skiptohex(char_u *q) return p; } -// Vim's own character class functions. These exist because many library -// islower()/toupper() etc. do not work properly: they crash when used with -// invalid values or can't handle latin1 when the locale is C. -// Speed is most important here. - -/// Check that the character is lower-case -/// -/// @param c character to check -bool vim_islower(int c) - FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT -{ - if (c <= '@') { - return false; - } - - if (c >= 0x80) { - return utf_islower(c); - } - return islower(c); -} - -/// Check that the character is upper-case -/// -/// @param c character to check -bool vim_isupper(int c) - FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT -{ - if (c <= '@') { - return false; - } - - if (c >= 0x80) { - return utf_isupper(c); - } - return isupper(c); -} - -int vim_toupper(int c) -{ - if (c <= '@') { - return c; - } - - if (c >= 0x80) { - return utf_toupper(c); - } - return TOUPPER_LOC(c); -} - -int vim_tolower(int c) -{ - if (c <= '@') { - return c; - } - - if (c >= 0x80) { - return utf_tolower(c); - } - return TOLOWER_LOC(c); -} - /// Skip over text until ' ' or '\t' or NUL /// /// @param[in] p Text to skip over. diff --git a/src/nvim/edit.c b/src/nvim/edit.c index b35504908e..fe00027dec 100644 --- a/src/nvim/edit.c +++ b/src/nvim/edit.c @@ -2037,12 +2037,12 @@ int ins_compl_add_infercase(char_u *str, int len, int icase, char_u *fname, int } else { c = *(p++); } - if (vim_islower(c)) { + if (mb_islower(c)) { has_lower = true; - if (vim_isupper(wca[i])) { + if (mb_isupper(wca[i])) { // Rule 1 is satisfied. for (i = actual_compl_length; i < actual_len; i++) { - wca[i] = vim_tolower(wca[i]); + wca[i] = mb_tolower(wca[i]); } break; } @@ -2062,14 +2062,14 @@ int ins_compl_add_infercase(char_u *str, int len, int icase, char_u *fname, int } else { c = *(p++); } - if (was_letter && vim_isupper(c) && vim_islower(wca[i])) { + if (was_letter && mb_isupper(c) && mb_islower(wca[i])) { // Rule 2 is satisfied. for (i = actual_compl_length; i < actual_len; i++) { - wca[i] = vim_toupper(wca[i]); + wca[i] = mb_toupper(wca[i]); } break; } - was_letter = vim_islower(c) || vim_isupper(c); + was_letter = mb_islower(c) || mb_isupper(c); } } @@ -2082,10 +2082,10 @@ int ins_compl_add_infercase(char_u *str, int len, int icase, char_u *fname, int } else { c = *(p++); } - if (vim_islower(c)) { - wca[i] = vim_tolower(wca[i]); - } else if (vim_isupper(c)) { - wca[i] = vim_toupper(wca[i]); + if (mb_islower(c)) { + wca[i] = mb_tolower(wca[i]); + } else if (mb_isupper(c)) { + wca[i] = mb_toupper(wca[i]); } } } @@ -2302,9 +2302,10 @@ static void ins_compl_longest_match(compl_T *match) c1 = *p; c2 = *s; } - if (match->cp_icase ? (vim_tolower(c1) != vim_tolower(c2)) - : (c1 != c2)) + if (match->cp_icase ? (mb_tolower(c1) != mb_tolower(c2)) + : (c1 != c2)) { break; + } if (has_mbyte) { mb_ptr_adv(p); mb_ptr_adv(s); diff --git a/src/nvim/eval.c b/src/nvim/eval.c index 124d6acfe9..0663e19b9a 100644 --- a/src/nvim/eval.c +++ b/src/nvim/eval.c @@ -16791,30 +16791,9 @@ void timer_teardown(void) */ static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr) { - char_u *p = (char_u *)xstrdup(tv_get_string(&argvars[0])); rettv->v_type = VAR_STRING; - rettv->vval.v_string = p; - - while (*p != NUL) { - int l; - - if (enc_utf8) { - int c, lc; - - c = utf_ptr2char(p); - lc = utf_tolower(c); - l = utf_ptr2len(p); - /* TODO: reallocate string when byte count changes. */ - if (utf_char2len(lc) == l) - utf_char2bytes(lc, p); - p += l; - } else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) - p += l; /* skip multi-byte character */ - else { - *p = TOLOWER_LOC(*p); /* note that tolower() can be a macro */ - ++p; - } - } + rettv->vval.v_string = (char_u *)strcase_save(tv_get_string(&argvars[0]), + false); } /* @@ -16823,7 +16802,8 @@ static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr) static void f_toupper(typval_T *argvars, typval_T *rettv, FunPtr fptr) { rettv->v_type = VAR_STRING; - rettv->vval.v_string = (char_u *)strup_save(tv_get_string(&argvars[0])); + rettv->vval.v_string = (char_u *)strcase_save(tv_get_string(&argvars[0]), + true); } /* diff --git a/src/nvim/ex_getln.c b/src/nvim/ex_getln.c index 8810204c03..9d74f554ba 100644 --- a/src/nvim/ex_getln.c +++ b/src/nvim/ex_getln.c @@ -1231,7 +1231,7 @@ static int command_line_handle_key(CommandLineState *s) // command line has no uppercase characters, convert // the character to lowercase if (p_ic && p_scs && !pat_has_uppercase(ccline.cmdbuff)) { - s->c = vim_tolower(s->c); + s->c = mb_tolower(s->c); } if (s->c != NUL) { @@ -3018,7 +3018,7 @@ ExpandOne ( || xp->xp_context == EXPAND_FILES || xp->xp_context == EXPAND_SHELLCMD || xp->xp_context == EXPAND_BUFFERS)) { - if (vim_tolower(c0) != vim_tolower(ci)) { + if (mb_tolower(c0) != mb_tolower(ci)) { break; } } else if (c0 != ci) { diff --git a/src/nvim/file_search.c b/src/nvim/file_search.c index 9592235905..db745bdd15 100644 --- a/src/nvim/file_search.c +++ b/src/nvim/file_search.c @@ -1057,7 +1057,7 @@ static bool ff_wc_equal(char_u *s1, char_u *s2) c1 = PTR2CHAR(s1 + i); c2 = PTR2CHAR(s2 + j); - if ((p_fic ? vim_tolower(c1) != vim_tolower(c2) : c1 != c2) + if ((p_fic ? mb_tolower(c1) != mb_tolower(c2) : c1 != c2) && (prev1 != '*' || prev2 != '*')) { return false; } diff --git a/src/nvim/macros.h b/src/nvim/macros.h index a8df6322cf..22fd48de9d 100644 --- a/src/nvim/macros.h +++ b/src/nvim/macros.h @@ -62,7 +62,7 @@ * toupper() and tolower() that use the current locale. * Careful: Only call TOUPPER_LOC() and TOLOWER_LOC() with a character in the * range 0 - 255. toupper()/tolower() on some systems can't handle others. - * Note: It is often better to use vim_tolower() and vim_toupper(), because many + * Note: It is often better to use mb_tolower() and mb_toupper(), because many * toupper() and tolower() implementations only work for ASCII. */ #define TOUPPER_LOC toupper diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 460528b85f..b18459a2b5 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1174,11 +1174,14 @@ int utf_fold(int a) return utf_convert(a, foldCase, ARRAY_SIZE(foldCase)); } -/* - * Return the upper-case equivalent of "a", which is a UCS-4 character. Use - * simple case folding. - */ -int utf_toupper(int a) +// Vim's own character class functions. These exist because many library +// islower()/toupper() etc. do not work properly: they crash when used with +// invalid values or can't handle latin1 when the locale is C. +// Speed is most important here. + +/// Return the upper-case equivalent of "a", which is a UCS-4 character. Use +/// simple case folding. +int mb_toupper(int a) { /* If 'casemap' contains "keepascii" use ASCII style toupper(). */ if (a < 128 && (cmp_flags & CMP_KEEPASCII)) @@ -1198,17 +1201,15 @@ int utf_toupper(int a) return utf_convert(a, toUpper, ARRAY_SIZE(toUpper)); } -bool utf_islower(int a) +bool mb_islower(int a) { - /* German sharp s is lower case but has no upper case equivalent. */ - return (utf_toupper(a) != a) || a == 0xdf; + // German sharp s is lower case but has no upper case equivalent. + return (mb_toupper(a) != a) || a == 0xdf; } -/* - * Return the lower-case equivalent of "a", which is a UCS-4 character. Use - * simple case folding. - */ -int utf_tolower(int a) +/// Return the lower-case equivalent of "a", which is a UCS-4 character. Use +/// simple case folding. +int mb_tolower(int a) { /* If 'casemap' contains "keepascii" use ASCII style tolower(). */ if (a < 128 && (cmp_flags & CMP_KEEPASCII)) @@ -1228,9 +1229,9 @@ int utf_tolower(int a) return utf_convert(a, toLower, ARRAY_SIZE(toLower)); } -bool utf_isupper(int a) +bool mb_isupper(int a) { - return utf_tolower(a) != a; + return mb_tolower(a) != a; } static int utf_strnicmp(const char_u *s1, const char_u *s2, size_t n1, diff --git a/src/nvim/message.c b/src/nvim/message.c index 1d3609291a..3e4a1e10b6 100644 --- a/src/nvim/message.c +++ b/src/nvim/message.c @@ -2730,8 +2730,8 @@ do_dialog ( break; } - /* Make the character lowercase, as chars in "hotkeys" are. */ - c = vim_tolower(c); + // Make the character lowercase, as chars in "hotkeys" are. + c = mb_tolower(c); retval = 1; for (i = 0; hotkeys[i]; ++i) { if (has_mbyte) { @@ -2777,7 +2777,7 @@ copy_char ( if (has_mbyte) { if (lowercase) { - c = vim_tolower((*mb_ptr2char)(from)); + c = mb_tolower((*mb_ptr2char)(from)); return (*mb_char2bytes)(c, to); } else { len = (*mb_ptr2len)(from); diff --git a/src/nvim/ops.c b/src/nvim/ops.c index 68ef27222c..f11d6b69b2 100644 --- a/src/nvim/ops.c +++ b/src/nvim/ops.c @@ -1956,16 +1956,18 @@ int swapchar(int op_type, pos_T *pos) if (enc_dbcs != 0 && c >= 0x100) /* No lower/uppercase letter */ return FALSE; nc = c; - if (vim_islower(c)) { - if (op_type == OP_ROT13) + if (mb_islower(c)) { + if (op_type == OP_ROT13) { nc = ROT13(c, 'a'); - else if (op_type != OP_LOWER) - nc = vim_toupper(c); - } else if (vim_isupper(c)) { - if (op_type == OP_ROT13) + } else if (op_type != OP_LOWER) { + nc = mb_toupper(c); + } + } else if (mb_isupper(c)) { + if (op_type == OP_ROT13) { nc = ROT13(c, 'A'); - else if (op_type != OP_UPPER) - nc = vim_tolower(c); + } else if (op_type != OP_UPPER) { + nc = mb_tolower(c); + } } if (nc != c) { if (enc_utf8 && (c >= 0x80 || nc >= 0x80)) { @@ -3327,10 +3329,11 @@ void ex_display(exarg_T *eap) get_clipboard(name, &yb, true); - if (name == vim_tolower(redir_reg) - || (redir_reg == '"' && yb == y_previous)) - continue; /* do not list register being written to, the - * pointer can be freed */ + if (name == mb_tolower(redir_reg) + || (redir_reg == '"' && yb == y_previous)) { + continue; // do not list register being written to, the + // pointer can be freed + } if (yb->y_array != NULL) { msg_putchar('\n'); diff --git a/src/nvim/path.c b/src/nvim/path.c index 6bf42ed2fa..205fc2ed62 100644 --- a/src/nvim/path.c +++ b/src/nvim/path.c @@ -1853,7 +1853,7 @@ int pathcmp(const char *p, const char *q, int maxlen) break; } - if ((p_fic ? vim_toupper(c1) != vim_toupper(c2) : c1 != c2) + if ((p_fic ? mb_toupper(c1) != mb_toupper(c2) : c1 != c2) #ifdef BACKSLASH_IN_FILENAME /* consider '/' and '\\' to be equal */ && !((c1 == '/' && c2 == '\\') @@ -1864,8 +1864,8 @@ int pathcmp(const char *p, const char *q, int maxlen) return -1; if (vim_ispathsep(c2)) return 1; - return p_fic ? vim_toupper(c1) - vim_toupper(c2) - : c1 - c2; /* no match */ + return p_fic ? mb_toupper(c1) - mb_toupper(c2) + : c1 - c2; // no match } i += MB_PTR2LEN((char_u *)p + i); diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 9baa53d2a2..4b5e17b00b 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -2350,7 +2350,7 @@ collection: break; case CLASS_LOWER: for (cu = 1; cu <= 255; cu++) { - if (vim_islower(cu) && cu != 170 && cu != 186) { + if (mb_islower(cu) && cu != 170 && cu != 186) { regmbc(cu); } } @@ -2376,7 +2376,7 @@ collection: break; case CLASS_UPPER: for (cu = 1; cu <= 255; cu++) { - if (vim_isupper(cu)) { + if (mb_isupper(cu)) { regmbc(cu); } } @@ -3474,7 +3474,7 @@ static long bt_regexec_both(char_u *line, || (ireg_ic && (((enc_utf8 && utf_fold(prog->regstart) == utf_fold(c))) || (c < 255 && prog->regstart < 255 - && vim_tolower(prog->regstart) == vim_tolower(c))))) { + && mb_tolower(prog->regstart) == mb_tolower(c))))) { retval = regtry(prog, col); } else { retval = 0; @@ -4155,7 +4155,7 @@ regmatch ( if (*opnd != *reginput && (!ireg_ic || (!enc_utf8 - && vim_tolower(*opnd) != vim_tolower(*reginput)))) { + && mb_tolower(*opnd) != mb_tolower(*reginput)))) { status = RA_NOMATCH; } else if (*opnd == NUL) { // match empty string always works; happens when "~" is @@ -4573,12 +4573,14 @@ regmatch ( if (OP(next) == EXACTLY) { rst.nextb = *OPERAND(next); if (ireg_ic) { - if (vim_isupper(rst.nextb)) - rst.nextb_ic = vim_tolower(rst.nextb); - else - rst.nextb_ic = vim_toupper(rst.nextb); - } else + if (mb_isupper(rst.nextb)) { + rst.nextb_ic = mb_tolower(rst.nextb); + } else { + rst.nextb_ic = mb_toupper(rst.nextb); + } + } else { rst.nextb_ic = rst.nextb; + } } else { rst.nextb = NUL; rst.nextb_ic = NUL; @@ -5339,8 +5341,8 @@ do_class: * would have been used for it. It does handle single-byte * characters, such as latin1. */ if (ireg_ic) { - cu = vim_toupper(*opnd); - cl = vim_tolower(*opnd); + cu = mb_toupper(*opnd); + cl = mb_tolower(*opnd); while (count < maxcount && (*scan == cu || *scan == cl)) { count++; scan++; @@ -6312,14 +6314,15 @@ static char_u *cstrchr(char_u *s, int c) /* tolower() and toupper() can be slow, comparing twice should be a lot * faster (esp. when using MS Visual C++!). * For UTF-8 need to use folded case. */ - if (enc_utf8 && c > 0x80) + if (c > 0x80) { cc = utf_fold(c); - else if (vim_isupper(c)) - cc = vim_tolower(c); - else if (vim_islower(c)) - cc = vim_toupper(c); - else + } else if (mb_isupper(c)) { + cc = mb_tolower(c); + } else if (mb_islower(c)) { + cc = mb_toupper(c); + } else { return vim_strchr(s, c); + } if (has_mbyte) { for (p = s; *p != NUL; p += (*mb_ptr2len)(p)) { @@ -6348,28 +6351,28 @@ static char_u *cstrchr(char_u *s, int c) static fptr_T do_upper(int *d, int c) { - *d = vim_toupper(c); + *d = mb_toupper(c); return (fptr_T)NULL; } static fptr_T do_Upper(int *d, int c) { - *d = vim_toupper(c); + *d = mb_toupper(c); return (fptr_T)do_Upper; } static fptr_T do_lower(int *d, int c) { - *d = vim_tolower(c); + *d = mb_tolower(c); return (fptr_T)NULL; } static fptr_T do_Lower(int *d, int c) { - *d = vim_tolower(c); + *d = mb_tolower(c); return (fptr_T)do_Lower; } diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 5b49ab38f0..caf26fdd35 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -4373,7 +4373,7 @@ static int check_char_class(int class, int c) return OK; break; case NFA_CLASS_LOWER: - if (vim_islower(c) && c != 170 && c != 186) { + if (mb_islower(c) && c != 170 && c != 186) { return OK; } break; @@ -4391,8 +4391,9 @@ static int check_char_class(int class, int c) return OK; break; case NFA_CLASS_UPPER: - if (vim_isupper(c)) + if (mb_isupper(c)) { return OK; + } break; case NFA_CLASS_XDIGIT: if (ascii_isxdigit(c)) @@ -4892,7 +4893,7 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text) int c2_len = PTR2LEN(s2); int c2 = PTR2CHAR(s2); - if ((c1 != c2 && (!ireg_ic || vim_tolower(c1) != vim_tolower(c2))) + if ((c1 != c2 && (!ireg_ic || mb_tolower(c1) != mb_tolower(c2))) || c1_len != c2_len) { match = false; break; @@ -5585,22 +5586,24 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, break; } if (ireg_ic) { - int curc_low = vim_tolower(curc); - int done = FALSE; + int curc_low = mb_tolower(curc); + int done = false; - for (; c1 <= c2; ++c1) - if (vim_tolower(c1) == curc_low) { + for (; c1 <= c2; c1++) { + if (mb_tolower(c1) == curc_low) { result = result_if_matched; done = TRUE; break; } - if (done) + } + if (done) { break; + } } } else if (state->c < 0 ? check_char_class(state->c, curc) : (curc == state->c - || (ireg_ic && vim_tolower(curc) - == vim_tolower(state->c)))) { + || (ireg_ic && mb_tolower(curc) + == mb_tolower(state->c)))) { result = result_if_matched; break; } @@ -6003,8 +6006,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, #endif result = (c == curc); - if (!result && ireg_ic) - result = vim_tolower(c) == vim_tolower(curc); + if (!result && ireg_ic) { + result = mb_tolower(c) == mb_tolower(curc); + } // If ireg_icombine is not set only skip over the character // itself. When it is set skip over composing characters. @@ -6152,8 +6156,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // Checking if the required start character matches is // cheaper than adding a state that won't match. c = PTR2CHAR(reginput + clen); - if (c != prog->regstart && (!ireg_ic || vim_tolower(c) - != vim_tolower(prog->regstart))) { + if (c != prog->regstart && (!ireg_ic || mb_tolower(c) + != mb_tolower(prog->regstart))) { #ifdef REGEXP_DEBUG fprintf(log_fd, " Skipping start state, regstart does not match\n"); diff --git a/src/nvim/search.c b/src/nvim/search.c index c5c92b41c5..91a558045f 100644 --- a/src/nvim/search.c +++ b/src/nvim/search.c @@ -335,23 +335,26 @@ int pat_has_uppercase(char_u *pat) while (*p != NUL) { int l; - if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) { - if (enc_utf8 && utf_isupper(utf_ptr2char(p))) - return TRUE; + if ((l = mb_ptr2len(p)) > 1) { + if (mb_isupper(utf_ptr2char(p))) { + return true; + } p += l; } else if (*p == '\\') { - if (p[1] == '_' && p[2] != NUL) /* skip "\_X" */ + if (p[1] == '_' && p[2] != NUL) { // skip "\_X" p += 3; - else if (p[1] == '%' && p[2] != NUL) /* skip "\%X" */ + } else if (p[1] == '%' && p[2] != NUL) { // skip "\%X" p += 3; - else if (p[1] != NUL) /* skip "\X" */ + } else if (p[1] != NUL) { // skip "\X" p += 2; - else + } else { p += 1; - } else if (vim_isupper(*p)) - return TRUE; - else - ++p; + } + } else if (mb_isupper(*p)) { + return true; + } else { + p++; + } } return FALSE; } diff --git a/src/nvim/spell.c b/src/nvim/spell.c index d4f49bffb2..18febda1d8 100644 --- a/src/nvim/spell.c +++ b/src/nvim/spell.c @@ -2526,8 +2526,7 @@ void clear_spell_chartab(spelltab_T *sp) } } -// Init the chartab used for spelling. Only depends on 'encoding'. -// Called once while starting up and when 'encoding' changes. +// Init the chartab used for spelling. Called once while starting up. // The default is to use isalpha(), but the spell file should define the word // characters to make it possible that 'encoding' differs from the current // locale. For utf-8 we don't use isalpha() but our own functions. @@ -2537,36 +2536,17 @@ void init_spell_chartab(void) did_set_spelltab = false; clear_spell_chartab(&spelltab); - if (enc_dbcs) { - // DBCS: assume double-wide characters are word characters. - for (i = 128; i <= 255; ++i) - if (MB_BYTE2LEN(i) == 2) - spelltab.st_isw[i] = true; - } else if (enc_utf8) { - for (i = 128; i < 256; ++i) { - int f = utf_fold(i); - int u = utf_toupper(i); - - spelltab.st_isu[i] = utf_isupper(i); - spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i); - // The folded/upper-cased value is different between latin1 and - // utf8 for 0xb5, causing E763 for no good reason. Use the latin1 - // value for utf-8 to avoid this. - spelltab.st_fold[i] = (f < 256) ? f : i; - spelltab.st_upper[i] = (u < 256) ? u : i; - } - } else { - // Rough guess: use locale-dependent library functions. - for (i = 128; i < 256; ++i) { - if (vim_isupper(i)) { - spelltab.st_isw[i] = true; - spelltab.st_isu[i] = true; - spelltab.st_fold[i] = vim_tolower(i); - } else if (vim_islower(i)) { - spelltab.st_isw[i] = true; - spelltab.st_upper[i] = vim_toupper(i); - } - } + for (i = 128; i < 256; i++) { + int f = utf_fold(i); + int u = mb_toupper(i); + + spelltab.st_isu[i] = mb_isupper(i); + spelltab.st_isw[i] = spelltab.st_isu[i] || mb_islower(i); + // The folded/upper-cased value is different between latin1 and + // utf8 for 0xb5, causing E763 for no good reason. Use the latin1 + // value for utf-8 to avoid this. + spelltab.st_fold[i] = (f < 256) ? f : i; + spelltab.st_upper[i] = (u < 256) ? u : i; } } diff --git a/src/nvim/spell_defs.h b/src/nvim/spell_defs.h index c54a7f5390..ddd54c724e 100644 --- a/src/nvim/spell_defs.h +++ b/src/nvim/spell_defs.h @@ -265,11 +265,11 @@ typedef struct trystate_S { : (c) < \ 256 ? (int)spelltab.st_fold[c] : (int)towlower(c)) -#define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ +#define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? mb_toupper(c) \ : (c) < \ 256 ? (int)spelltab.st_upper[c] : (int)towupper(c)) -#define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ +#define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? mb_isupper(c) \ : (c) < 256 ? spelltab.st_isu[c] : iswupper(c)) // First language that is loaded, start of the linked list of loaded diff --git a/src/nvim/strings.c b/src/nvim/strings.c index 5dcffe00e0..87e066d80a 100644 --- a/src/nvim/strings.c +++ b/src/nvim/strings.c @@ -291,14 +291,15 @@ void vim_strup(char_u *p) } } -/// Make given string all upper-case +/// Make given string all upper-case or all lower-case /// -/// Handels multi-byte characters as good as possible. +/// Handles multi-byte characters as good as possible. /// /// @param[in] orig Input string. +/// @param[in] upper If true make uppercase, otherwise lowercase /// /// @return [allocated] upper-cased string. -char *strup_save(const char *const orig) +char *strcase_save(const char *const orig, bool upper) FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL { char *res = xstrdup(orig); @@ -307,33 +308,25 @@ char *strup_save(const char *const orig) while (*p != NUL) { int l; - if (enc_utf8) { - int c = utf_ptr2char((const char_u *)p); - int uc = utf_toupper(c); - - // Reallocate string when byte count changes. This is rare, - // thus it's OK to do another malloc()/free(). - l = utf_ptr2len((const char_u *)p); - int newl = utf_char2len(uc); - if (newl != l) { - // TODO(philix): use xrealloc() in strup_save() - char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l)); - memcpy(s, res, (size_t)(p - res)); - STRCPY(s + (p - res) + newl, p + l); - p = s + (p - res); - xfree(res); - res = s; - } - - utf_char2bytes(uc, (char_u *)p); - p += newl; - } else if (has_mbyte && (l = (*mb_ptr2len)((const char_u *)p)) > 1) { - p += l; // Skip multi-byte character. - } else { - // note that toupper() can be a macro - *p = (char)(uint8_t)TOUPPER_LOC(*p); - p++; + int c = utf_ptr2char((const char_u *)p); + int uc = upper ? mb_toupper(c) : mb_tolower(c); + + // Reallocate string when byte count changes. This is rare, + // thus it's OK to do another malloc()/free(). + l = utf_ptr2len((const char_u *)p); + int newl = utf_char2len(uc); + if (newl != l) { + // TODO(philix): use xrealloc() in strup_save() + char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l)); + memcpy(s, res, (size_t)(p - res)); + STRCPY(s + (p - res) + newl, p + l); + p = s + (p - res); + xfree(res); + res = s; } + + utf_char2bytes(uc, (char_u *)p); + p += newl; } return res; diff --git a/src/nvim/testdir/test_functions.vim b/src/nvim/testdir/test_functions.vim index 81cb6314ce..3c258299c1 100644 --- a/src/nvim/testdir/test_functions.vim +++ b/src/nvim/testdir/test_functions.vim @@ -29,3 +29,147 @@ func Test_setbufvar_options() bwipe! endfunc +func Test_tolower() + call assert_equal("", tolower("")) + + " Test with all printable ASCII characters. + call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', + \ tolower(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~')) + + if !has('multi_byte') + return + endif + + " Test with a few uppercase diacritics. + call assert_equal("aàáâãäåāăąǎǟǡả", tolower("AÀÁÂÃÄÅĀĂĄǍǞǠẢ")) + call assert_equal("bḃḇ", tolower("BḂḆ")) + call assert_equal("cçćĉċč", tolower("CÇĆĈĊČ")) + call assert_equal("dďđḋḏḑ", tolower("DĎĐḊḎḐ")) + call assert_equal("eèéêëēĕėęěẻẽ", tolower("EÈÉÊËĒĔĖĘĚẺẼ")) + call assert_equal("fḟ ", tolower("FḞ ")) + call assert_equal("gĝğġģǥǧǵḡ", tolower("GĜĞĠĢǤǦǴḠ")) + call assert_equal("hĥħḣḧḩ", tolower("HĤĦḢḦḨ")) + call assert_equal("iìíîïĩīĭįiǐỉ", tolower("IÌÍÎÏĨĪĬĮİǏỈ")) + call assert_equal("jĵ", tolower("JĴ")) + call assert_equal("kķǩḱḵ", tolower("KĶǨḰḴ")) + call assert_equal("lĺļľŀłḻ", tolower("LĹĻĽĿŁḺ")) + call assert_equal("mḿṁ", tolower("MḾṀ")) + call assert_equal("nñńņňṅṉ", tolower("NÑŃŅŇṄṈ")) + call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ")) + call assert_equal("pṕṗ", tolower("PṔṖ")) + call assert_equal("q", tolower("Q")) + call assert_equal("rŕŗřṙṟ", tolower("RŔŖŘṘṞ")) + call assert_equal("sśŝşšṡ", tolower("SŚŜŞŠṠ")) + call assert_equal("tţťŧṫṯ", tolower("TŢŤŦṪṮ")) + call assert_equal("uùúûüũūŭůűųưǔủ", tolower("UÙÚÛÜŨŪŬŮŰŲƯǓỦ")) + call assert_equal("vṽ", tolower("VṼ")) + call assert_equal("wŵẁẃẅẇ", tolower("WŴẀẂẄẆ")) + call assert_equal("xẋẍ", tolower("XẊẌ")) + call assert_equal("yýŷÿẏỳỷỹ", tolower("YÝŶŸẎỲỶỸ")) + call assert_equal("zźżžƶẑẕ", tolower("ZŹŻŽƵẐẔ")) + + " Test with a few lowercase diacritics, which should remain unchanged. + call assert_equal("aàáâãäåāăąǎǟǡả", tolower("aàáâãäåāăąǎǟǡả")) + call assert_equal("bḃḇ", tolower("bḃḇ")) + call assert_equal("cçćĉċč", tolower("cçćĉċč")) + call assert_equal("dďđḋḏḑ", tolower("dďđḋḏḑ")) + call assert_equal("eèéêëēĕėęěẻẽ", tolower("eèéêëēĕėęěẻẽ")) + call assert_equal("fḟ", tolower("fḟ")) + call assert_equal("gĝğġģǥǧǵḡ", tolower("gĝğġģǥǧǵḡ")) + call assert_equal("hĥħḣḧḩẖ", tolower("hĥħḣḧḩẖ")) + call assert_equal("iìíîïĩīĭįǐỉ", tolower("iìíîïĩīĭįǐỉ")) + call assert_equal("jĵǰ", tolower("jĵǰ")) + call assert_equal("kķǩḱḵ", tolower("kķǩḱḵ")) + call assert_equal("lĺļľŀłḻ", tolower("lĺļľŀłḻ")) + call assert_equal("mḿṁ ", tolower("mḿṁ ")) + call assert_equal("nñńņňʼnṅṉ", tolower("nñńņňʼnṅṉ")) + call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("oòóôõöøōŏőơǒǫǭỏ")) + call assert_equal("pṕṗ", tolower("pṕṗ")) + call assert_equal("q", tolower("q")) + call assert_equal("rŕŗřṙṟ", tolower("rŕŗřṙṟ")) + call assert_equal("sśŝşšṡ", tolower("sśŝşšṡ")) + call assert_equal("tţťŧṫṯẗ", tolower("tţťŧṫṯẗ")) + call assert_equal("uùúûüũūŭůűųưǔủ", tolower("uùúûüũūŭůűųưǔủ")) + call assert_equal("vṽ", tolower("vṽ")) + call assert_equal("wŵẁẃẅẇẘ", tolower("wŵẁẃẅẇẘ")) + call assert_equal("ẋẍ", tolower("ẋẍ")) + call assert_equal("yýÿŷẏẙỳỷỹ", tolower("yýÿŷẏẙỳỷỹ")) + call assert_equal("zźżžƶẑẕ", tolower("zźżžƶẑẕ")) + + " According to https://twitter.com/jifa/status/625776454479970304 + " Ⱥ (U+023A) and Ⱦ (U+023E) are the *only* code points to increase + " in length (2 to 3 bytes) when lowercased. So let's test them. + call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ")) +endfunc + +func Test_toupper() + call assert_equal("", toupper("")) + + " Test with all printable ASCII characters. + call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~', + \ toupper(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~')) + + if !has('multi_byte') + return + endif + + " Test with a few lowercase diacritics. + call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("aàáâãäåāăąǎǟǡả")) + call assert_equal("BḂḆ", toupper("bḃḇ")) + call assert_equal("CÇĆĈĊČ", toupper("cçćĉċč")) + call assert_equal("DĎĐḊḎḐ", toupper("dďđḋḏḑ")) + call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("eèéêëēĕėęěẻẽ")) + call assert_equal("FḞ", toupper("fḟ")) + call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("gĝğġģǥǧǵḡ")) + call assert_equal("HĤĦḢḦḨẖ", toupper("hĥħḣḧḩẖ")) + call assert_equal("IÌÍÎÏĨĪĬĮǏỈ", toupper("iìíîïĩīĭįǐỉ")) + call assert_equal("JĴǰ", toupper("jĵǰ")) + call assert_equal("KĶǨḰḴ", toupper("kķǩḱḵ")) + call assert_equal("LĹĻĽĿŁḺ", toupper("lĺļľŀłḻ")) + call assert_equal("MḾṀ ", toupper("mḿṁ ")) + call assert_equal("NÑŃŅŇʼnṄṈ", toupper("nñńņňʼnṅṉ")) + call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("oòóôõöøōŏőơǒǫǭỏ")) + call assert_equal("PṔṖ", toupper("pṕṗ")) + call assert_equal("Q", toupper("q")) + call assert_equal("RŔŖŘṘṞ", toupper("rŕŗřṙṟ")) + call assert_equal("SŚŜŞŠṠ", toupper("sśŝşšṡ")) + call assert_equal("TŢŤŦṪṮẗ", toupper("tţťŧṫṯẗ")) + call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("uùúûüũūŭůűųưǔủ")) + call assert_equal("VṼ", toupper("vṽ")) + call assert_equal("WŴẀẂẄẆẘ", toupper("wŵẁẃẅẇẘ")) + call assert_equal("ẊẌ", toupper("ẋẍ")) + call assert_equal("YÝŸŶẎẙỲỶỸ", toupper("yýÿŷẏẙỳỷỹ")) + call assert_equal("ZŹŻŽƵẐẔ", toupper("zźżžƶẑẕ")) + + " Test that uppercase diacritics, which should remain unchanged. + call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("AÀÁÂÃÄÅĀĂĄǍǞǠẢ")) + call assert_equal("BḂḆ", toupper("BḂḆ")) + call assert_equal("CÇĆĈĊČ", toupper("CÇĆĈĊČ")) + call assert_equal("DĎĐḊḎḐ", toupper("DĎĐḊḎḐ")) + call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("EÈÉÊËĒĔĖĘĚẺẼ")) + call assert_equal("FḞ ", toupper("FḞ ")) + call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("GĜĞĠĢǤǦǴḠ")) + call assert_equal("HĤĦḢḦḨ", toupper("HĤĦḢḦḨ")) + call assert_equal("IÌÍÎÏĨĪĬĮİǏỈ", toupper("IÌÍÎÏĨĪĬĮİǏỈ")) + call assert_equal("JĴ", toupper("JĴ")) + call assert_equal("KĶǨḰḴ", toupper("KĶǨḰḴ")) + call assert_equal("LĹĻĽĿŁḺ", toupper("LĹĻĽĿŁḺ")) + call assert_equal("MḾṀ", toupper("MḾṀ")) + call assert_equal("NÑŃŅŇṄṈ", toupper("NÑŃŅŇṄṈ")) + call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ")) + call assert_equal("PṔṖ", toupper("PṔṖ")) + call assert_equal("Q", toupper("Q")) + call assert_equal("RŔŖŘṘṞ", toupper("RŔŖŘṘṞ")) + call assert_equal("SŚŜŞŠṠ", toupper("SŚŜŞŠṠ")) + call assert_equal("TŢŤŦṪṮ", toupper("TŢŤŦṪṮ")) + call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("UÙÚÛÜŨŪŬŮŰŲƯǓỦ")) + call assert_equal("VṼ", toupper("VṼ")) + call assert_equal("WŴẀẂẄẆ", toupper("WŴẀẂẄẆ")) + call assert_equal("XẊẌ", toupper("XẊẌ")) + call assert_equal("YÝŶŸẎỲỶỸ", toupper("YÝŶŸẎỲỶỸ")) + call assert_equal("ZŹŻŽƵẐẔ", toupper("ZŹŻŽƵẐẔ")) + + call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ")) +endfunc + + diff --git a/src/nvim/testdir/test_normal.vim b/src/nvim/testdir/test_normal.vim index a22dca35cc..c529971528 100644 --- a/src/nvim/testdir/test_normal.vim +++ b/src/nvim/testdir/test_normal.vim @@ -1606,6 +1606,40 @@ fun! Test_normal30_changecase() norm! V~ call assert_equal('THIS IS A simple test: äüöss', getline('.')) + " Turkish ASCII turns to multi-byte. On Mac the Turkish locale is available + " but toupper()/tolower() don't do the right thing. + if !has('mac') && !has('osx') + try + lang tr_TR.UTF-8 + set casemap= + call setline(1, 'iI') + 1normal gUU + call assert_equal("\u0130I", getline(1)) + call assert_equal("\u0130I", toupper("iI")) + + call setline(1, 'iI') + 1normal guu + call assert_equal("i\u0131", getline(1)) + call assert_equal("i\u0131", tolower("iI")) + + set casemap& + call setline(1, 'iI') + 1normal gUU + call assert_equal("II", getline(1)) + call assert_equal("II", toupper("iI")) + + call setline(1, 'iI') + 1normal guu + call assert_equal("ii", getline(1)) + call assert_equal("ii", tolower("iI")) + + lang en_US.UTF-8 + catch /E197:/ + " can't use Turkish locale + throw 'Skipped: Turkish locale not available' + endtry + endif + " clean up bw! endfunc |