diff options
-rw-r--r-- | src/nvim/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/nvim/charset.c | 69 | ||||
-rw-r--r-- | src/nvim/edit.c | 25 | ||||
-rw-r--r-- | src/nvim/eval.c | 28 | ||||
-rw-r--r-- | src/nvim/ex_getln.c | 4 | ||||
-rw-r--r-- | src/nvim/file_search.c | 2 | ||||
-rw-r--r-- | src/nvim/macros.h | 2 | ||||
-rw-r--r-- | src/nvim/mbyte.c | 31 | ||||
-rw-r--r-- | src/nvim/message.c | 6 | ||||
-rw-r--r-- | src/nvim/ops.c | 27 | ||||
-rw-r--r-- | src/nvim/path.c | 6 | ||||
-rw-r--r-- | src/nvim/regexp.c | 45 | ||||
-rw-r--r-- | src/nvim/regexp_nfa.c | 32 | ||||
-rw-r--r-- | src/nvim/search.c | 25 | ||||
-rw-r--r-- | src/nvim/spell.c | 44 | ||||
-rw-r--r-- | src/nvim/spell_defs.h | 4 | ||||
-rw-r--r-- | src/nvim/spellfile.c | 146 | ||||
-rw-r--r-- | src/nvim/strings.c | 51 | ||||
-rw-r--r-- | src/nvim/syntax.c | 124 | ||||
-rw-r--r-- | src/nvim/testdir/test_functions.vim | 144 | ||||
-rw-r--r-- | src/nvim/testdir/test_normal.vim | 34 | ||||
-rw-r--r-- | test/functional/ex_cmds/syntax_spec.lua | 17 | ||||
-rw-r--r-- | test/functional/normal/lang_spec.lua | 59 | ||||
-rw-r--r-- | test/functional/spell/spellfile_spec.lua | 108 |
24 files changed, 685 insertions, 349 deletions
diff --git a/src/nvim/CMakeLists.txt b/src/nvim/CMakeLists.txt index cb003c2026..a91657f1bd 100644 --- a/src/nvim/CMakeLists.txt +++ b/src/nvim/CMakeLists.txt @@ -501,6 +501,7 @@ add_custom_command( ${CMAKE_COMMAND} -DTARGET=${LINT_SUPPRESSES_ROOT} -P ${LINT_SUPPRESSES_INSTALL_SCRIPT} + COMMAND ${CMAKE_COMMAND} -E touch ${LINT_SUPPRESSES_TOUCH_FILE} DEPENDS ${LINT_SUPPRESSES_ARCHIVE} ${LINT_SUPPRESSES_INSTALL_SCRIPT} ) diff --git a/src/nvim/charset.c b/src/nvim/charset.c index 99d3e2dd88..645139f696 100644 --- a/src/nvim/charset.c +++ b/src/nvim/charset.c @@ -212,8 +212,8 @@ int buf_init_chartab(buf_T *buf, int global) // work properly when 'encoding' is "latin1" and the locale is // "C". if (!do_isalpha - || vim_islower(c) - || vim_isupper(c) + || mb_islower(c) + || mb_isupper(c) || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))) { if (i == 0) { // (re)set ID flag @@ -417,11 +417,11 @@ char_u* str_foldcase(char_u *str, int orglen, char_u *buf, int buflen) while (STR_CHAR(i) != NUL) { int c = utf_ptr2char(STR_PTR(i)); int olen = utf_ptr2len(STR_PTR(i)); - int lc = utf_tolower(c); + int lc = mb_tolower(c); // Only replace the character when it is not an invalid // sequence (ASCII character or more than one byte) and - // utf_tolower() doesn't return the original character. + // mb_tolower() doesn't return the original character. if (((c < 0x80) || (olen > 1)) && (c != lc)) { int nlen = utf_char2len(lc); @@ -1506,67 +1506,6 @@ char_u* skiptohex(char_u *q) return p; } -// Vim's own character class functions. These exist because many library -// islower()/toupper() etc. do not work properly: they crash when used with -// invalid values or can't handle latin1 when the locale is C. -// Speed is most important here. - -/// Check that the character is lower-case -/// -/// @param c character to check -bool vim_islower(int c) - FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT -{ - if (c <= '@') { - return false; - } - - if (c >= 0x80) { - return utf_islower(c); - } - return islower(c); -} - -/// Check that the character is upper-case -/// -/// @param c character to check -bool vim_isupper(int c) - FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT -{ - if (c <= '@') { - return false; - } - - if (c >= 0x80) { - return utf_isupper(c); - } - return isupper(c); -} - -int vim_toupper(int c) -{ - if (c <= '@') { - return c; - } - - if (c >= 0x80) { - return utf_toupper(c); - } - return TOUPPER_LOC(c); -} - -int vim_tolower(int c) -{ - if (c <= '@') { - return c; - } - - if (c >= 0x80) { - return utf_tolower(c); - } - return TOLOWER_LOC(c); -} - /// Skip over text until ' ' or '\t' or NUL /// /// @param[in] p Text to skip over. diff --git a/src/nvim/edit.c b/src/nvim/edit.c index b35504908e..fe00027dec 100644 --- a/src/nvim/edit.c +++ b/src/nvim/edit.c @@ -2037,12 +2037,12 @@ int ins_compl_add_infercase(char_u *str, int len, int icase, char_u *fname, int } else { c = *(p++); } - if (vim_islower(c)) { + if (mb_islower(c)) { has_lower = true; - if (vim_isupper(wca[i])) { + if (mb_isupper(wca[i])) { // Rule 1 is satisfied. for (i = actual_compl_length; i < actual_len; i++) { - wca[i] = vim_tolower(wca[i]); + wca[i] = mb_tolower(wca[i]); } break; } @@ -2062,14 +2062,14 @@ int ins_compl_add_infercase(char_u *str, int len, int icase, char_u *fname, int } else { c = *(p++); } - if (was_letter && vim_isupper(c) && vim_islower(wca[i])) { + if (was_letter && mb_isupper(c) && mb_islower(wca[i])) { // Rule 2 is satisfied. for (i = actual_compl_length; i < actual_len; i++) { - wca[i] = vim_toupper(wca[i]); + wca[i] = mb_toupper(wca[i]); } break; } - was_letter = vim_islower(c) || vim_isupper(c); + was_letter = mb_islower(c) || mb_isupper(c); } } @@ -2082,10 +2082,10 @@ int ins_compl_add_infercase(char_u *str, int len, int icase, char_u *fname, int } else { c = *(p++); } - if (vim_islower(c)) { - wca[i] = vim_tolower(wca[i]); - } else if (vim_isupper(c)) { - wca[i] = vim_toupper(wca[i]); + if (mb_islower(c)) { + wca[i] = mb_tolower(wca[i]); + } else if (mb_isupper(c)) { + wca[i] = mb_toupper(wca[i]); } } } @@ -2302,9 +2302,10 @@ static void ins_compl_longest_match(compl_T *match) c1 = *p; c2 = *s; } - if (match->cp_icase ? (vim_tolower(c1) != vim_tolower(c2)) - : (c1 != c2)) + if (match->cp_icase ? (mb_tolower(c1) != mb_tolower(c2)) + : (c1 != c2)) { break; + } if (has_mbyte) { mb_ptr_adv(p); mb_ptr_adv(s); diff --git a/src/nvim/eval.c b/src/nvim/eval.c index 124d6acfe9..0663e19b9a 100644 --- a/src/nvim/eval.c +++ b/src/nvim/eval.c @@ -16791,30 +16791,9 @@ void timer_teardown(void) */ static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr) { - char_u *p = (char_u *)xstrdup(tv_get_string(&argvars[0])); rettv->v_type = VAR_STRING; - rettv->vval.v_string = p; - - while (*p != NUL) { - int l; - - if (enc_utf8) { - int c, lc; - - c = utf_ptr2char(p); - lc = utf_tolower(c); - l = utf_ptr2len(p); - /* TODO: reallocate string when byte count changes. */ - if (utf_char2len(lc) == l) - utf_char2bytes(lc, p); - p += l; - } else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) - p += l; /* skip multi-byte character */ - else { - *p = TOLOWER_LOC(*p); /* note that tolower() can be a macro */ - ++p; - } - } + rettv->vval.v_string = (char_u *)strcase_save(tv_get_string(&argvars[0]), + false); } /* @@ -16823,7 +16802,8 @@ static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr) static void f_toupper(typval_T *argvars, typval_T *rettv, FunPtr fptr) { rettv->v_type = VAR_STRING; - rettv->vval.v_string = (char_u *)strup_save(tv_get_string(&argvars[0])); + rettv->vval.v_string = (char_u *)strcase_save(tv_get_string(&argvars[0]), + true); } /* diff --git a/src/nvim/ex_getln.c b/src/nvim/ex_getln.c index 8810204c03..9d74f554ba 100644 --- a/src/nvim/ex_getln.c +++ b/src/nvim/ex_getln.c @@ -1231,7 +1231,7 @@ static int command_line_handle_key(CommandLineState *s) // command line has no uppercase characters, convert // the character to lowercase if (p_ic && p_scs && !pat_has_uppercase(ccline.cmdbuff)) { - s->c = vim_tolower(s->c); + s->c = mb_tolower(s->c); } if (s->c != NUL) { @@ -3018,7 +3018,7 @@ ExpandOne ( || xp->xp_context == EXPAND_FILES || xp->xp_context == EXPAND_SHELLCMD || xp->xp_context == EXPAND_BUFFERS)) { - if (vim_tolower(c0) != vim_tolower(ci)) { + if (mb_tolower(c0) != mb_tolower(ci)) { break; } } else if (c0 != ci) { diff --git a/src/nvim/file_search.c b/src/nvim/file_search.c index 9592235905..db745bdd15 100644 --- a/src/nvim/file_search.c +++ b/src/nvim/file_search.c @@ -1057,7 +1057,7 @@ static bool ff_wc_equal(char_u *s1, char_u *s2) c1 = PTR2CHAR(s1 + i); c2 = PTR2CHAR(s2 + j); - if ((p_fic ? vim_tolower(c1) != vim_tolower(c2) : c1 != c2) + if ((p_fic ? mb_tolower(c1) != mb_tolower(c2) : c1 != c2) && (prev1 != '*' || prev2 != '*')) { return false; } diff --git a/src/nvim/macros.h b/src/nvim/macros.h index a8df6322cf..22fd48de9d 100644 --- a/src/nvim/macros.h +++ b/src/nvim/macros.h @@ -62,7 +62,7 @@ * toupper() and tolower() that use the current locale. * Careful: Only call TOUPPER_LOC() and TOLOWER_LOC() with a character in the * range 0 - 255. toupper()/tolower() on some systems can't handle others. - * Note: It is often better to use vim_tolower() and vim_toupper(), because many + * Note: It is often better to use mb_tolower() and mb_toupper(), because many * toupper() and tolower() implementations only work for ASCII. */ #define TOUPPER_LOC toupper diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 460528b85f..b18459a2b5 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1174,11 +1174,14 @@ int utf_fold(int a) return utf_convert(a, foldCase, ARRAY_SIZE(foldCase)); } -/* - * Return the upper-case equivalent of "a", which is a UCS-4 character. Use - * simple case folding. - */ -int utf_toupper(int a) +// Vim's own character class functions. These exist because many library +// islower()/toupper() etc. do not work properly: they crash when used with +// invalid values or can't handle latin1 when the locale is C. +// Speed is most important here. + +/// Return the upper-case equivalent of "a", which is a UCS-4 character. Use +/// simple case folding. +int mb_toupper(int a) { /* If 'casemap' contains "keepascii" use ASCII style toupper(). */ if (a < 128 && (cmp_flags & CMP_KEEPASCII)) @@ -1198,17 +1201,15 @@ int utf_toupper(int a) return utf_convert(a, toUpper, ARRAY_SIZE(toUpper)); } -bool utf_islower(int a) +bool mb_islower(int a) { - /* German sharp s is lower case but has no upper case equivalent. */ - return (utf_toupper(a) != a) || a == 0xdf; + // German sharp s is lower case but has no upper case equivalent. + return (mb_toupper(a) != a) || a == 0xdf; } -/* - * Return the lower-case equivalent of "a", which is a UCS-4 character. Use - * simple case folding. - */ -int utf_tolower(int a) +/// Return the lower-case equivalent of "a", which is a UCS-4 character. Use +/// simple case folding. +int mb_tolower(int a) { /* If 'casemap' contains "keepascii" use ASCII style tolower(). */ if (a < 128 && (cmp_flags & CMP_KEEPASCII)) @@ -1228,9 +1229,9 @@ int utf_tolower(int a) return utf_convert(a, toLower, ARRAY_SIZE(toLower)); } -bool utf_isupper(int a) +bool mb_isupper(int a) { - return utf_tolower(a) != a; + return mb_tolower(a) != a; } static int utf_strnicmp(const char_u *s1, const char_u *s2, size_t n1, diff --git a/src/nvim/message.c b/src/nvim/message.c index 1d3609291a..3e4a1e10b6 100644 --- a/src/nvim/message.c +++ b/src/nvim/message.c @@ -2730,8 +2730,8 @@ do_dialog ( break; } - /* Make the character lowercase, as chars in "hotkeys" are. */ - c = vim_tolower(c); + // Make the character lowercase, as chars in "hotkeys" are. + c = mb_tolower(c); retval = 1; for (i = 0; hotkeys[i]; ++i) { if (has_mbyte) { @@ -2777,7 +2777,7 @@ copy_char ( if (has_mbyte) { if (lowercase) { - c = vim_tolower((*mb_ptr2char)(from)); + c = mb_tolower((*mb_ptr2char)(from)); return (*mb_char2bytes)(c, to); } else { len = (*mb_ptr2len)(from); diff --git a/src/nvim/ops.c b/src/nvim/ops.c index 68ef27222c..f11d6b69b2 100644 --- a/src/nvim/ops.c +++ b/src/nvim/ops.c @@ -1956,16 +1956,18 @@ int swapchar(int op_type, pos_T *pos) if (enc_dbcs != 0 && c >= 0x100) /* No lower/uppercase letter */ return FALSE; nc = c; - if (vim_islower(c)) { - if (op_type == OP_ROT13) + if (mb_islower(c)) { + if (op_type == OP_ROT13) { nc = ROT13(c, 'a'); - else if (op_type != OP_LOWER) - nc = vim_toupper(c); - } else if (vim_isupper(c)) { - if (op_type == OP_ROT13) + } else if (op_type != OP_LOWER) { + nc = mb_toupper(c); + } + } else if (mb_isupper(c)) { + if (op_type == OP_ROT13) { nc = ROT13(c, 'A'); - else if (op_type != OP_UPPER) - nc = vim_tolower(c); + } else if (op_type != OP_UPPER) { + nc = mb_tolower(c); + } } if (nc != c) { if (enc_utf8 && (c >= 0x80 || nc >= 0x80)) { @@ -3327,10 +3329,11 @@ void ex_display(exarg_T *eap) get_clipboard(name, &yb, true); - if (name == vim_tolower(redir_reg) - || (redir_reg == '"' && yb == y_previous)) - continue; /* do not list register being written to, the - * pointer can be freed */ + if (name == mb_tolower(redir_reg) + || (redir_reg == '"' && yb == y_previous)) { + continue; // do not list register being written to, the + // pointer can be freed + } if (yb->y_array != NULL) { msg_putchar('\n'); diff --git a/src/nvim/path.c b/src/nvim/path.c index 6bf42ed2fa..205fc2ed62 100644 --- a/src/nvim/path.c +++ b/src/nvim/path.c @@ -1853,7 +1853,7 @@ int pathcmp(const char *p, const char *q, int maxlen) break; } - if ((p_fic ? vim_toupper(c1) != vim_toupper(c2) : c1 != c2) + if ((p_fic ? mb_toupper(c1) != mb_toupper(c2) : c1 != c2) #ifdef BACKSLASH_IN_FILENAME /* consider '/' and '\\' to be equal */ && !((c1 == '/' && c2 == '\\') @@ -1864,8 +1864,8 @@ int pathcmp(const char *p, const char *q, int maxlen) return -1; if (vim_ispathsep(c2)) return 1; - return p_fic ? vim_toupper(c1) - vim_toupper(c2) - : c1 - c2; /* no match */ + return p_fic ? mb_toupper(c1) - mb_toupper(c2) + : c1 - c2; // no match } i += MB_PTR2LEN((char_u *)p + i); diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 9baa53d2a2..4b5e17b00b 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -2350,7 +2350,7 @@ collection: break; case CLASS_LOWER: for (cu = 1; cu <= 255; cu++) { - if (vim_islower(cu) && cu != 170 && cu != 186) { + if (mb_islower(cu) && cu != 170 && cu != 186) { regmbc(cu); } } @@ -2376,7 +2376,7 @@ collection: break; case CLASS_UPPER: for (cu = 1; cu <= 255; cu++) { - if (vim_isupper(cu)) { + if (mb_isupper(cu)) { regmbc(cu); } } @@ -3474,7 +3474,7 @@ static long bt_regexec_both(char_u *line, || (ireg_ic && (((enc_utf8 && utf_fold(prog->regstart) == utf_fold(c))) || (c < 255 && prog->regstart < 255 - && vim_tolower(prog->regstart) == vim_tolower(c))))) { + && mb_tolower(prog->regstart) == mb_tolower(c))))) { retval = regtry(prog, col); } else { retval = 0; @@ -4155,7 +4155,7 @@ regmatch ( if (*opnd != *reginput && (!ireg_ic || (!enc_utf8 - && vim_tolower(*opnd) != vim_tolower(*reginput)))) { + && mb_tolower(*opnd) != mb_tolower(*reginput)))) { status = RA_NOMATCH; } else if (*opnd == NUL) { // match empty string always works; happens when "~" is @@ -4573,12 +4573,14 @@ regmatch ( if (OP(next) == EXACTLY) { rst.nextb = *OPERAND(next); if (ireg_ic) { - if (vim_isupper(rst.nextb)) - rst.nextb_ic = vim_tolower(rst.nextb); - else - rst.nextb_ic = vim_toupper(rst.nextb); - } else + if (mb_isupper(rst.nextb)) { + rst.nextb_ic = mb_tolower(rst.nextb); + } else { + rst.nextb_ic = mb_toupper(rst.nextb); + } + } else { rst.nextb_ic = rst.nextb; + } } else { rst.nextb = NUL; rst.nextb_ic = NUL; @@ -5339,8 +5341,8 @@ do_class: * would have been used for it. It does handle single-byte * characters, such as latin1. */ if (ireg_ic) { - cu = vim_toupper(*opnd); - cl = vim_tolower(*opnd); + cu = mb_toupper(*opnd); + cl = mb_tolower(*opnd); while (count < maxcount && (*scan == cu || *scan == cl)) { count++; scan++; @@ -6312,14 +6314,15 @@ static char_u *cstrchr(char_u *s, int c) /* tolower() and toupper() can be slow, comparing twice should be a lot * faster (esp. when using MS Visual C++!). * For UTF-8 need to use folded case. */ - if (enc_utf8 && c > 0x80) + if (c > 0x80) { cc = utf_fold(c); - else if (vim_isupper(c)) - cc = vim_tolower(c); - else if (vim_islower(c)) - cc = vim_toupper(c); - else + } else if (mb_isupper(c)) { + cc = mb_tolower(c); + } else if (mb_islower(c)) { + cc = mb_toupper(c); + } else { return vim_strchr(s, c); + } if (has_mbyte) { for (p = s; *p != NUL; p += (*mb_ptr2len)(p)) { @@ -6348,28 +6351,28 @@ static char_u *cstrchr(char_u *s, int c) static fptr_T do_upper(int *d, int c) { - *d = vim_toupper(c); + *d = mb_toupper(c); return (fptr_T)NULL; } static fptr_T do_Upper(int *d, int c) { - *d = vim_toupper(c); + *d = mb_toupper(c); return (fptr_T)do_Upper; } static fptr_T do_lower(int *d, int c) { - *d = vim_tolower(c); + *d = mb_tolower(c); return (fptr_T)NULL; } static fptr_T do_Lower(int *d, int c) { - *d = vim_tolower(c); + *d = mb_tolower(c); return (fptr_T)do_Lower; } diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 5b49ab38f0..caf26fdd35 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -4373,7 +4373,7 @@ static int check_char_class(int class, int c) return OK; break; case NFA_CLASS_LOWER: - if (vim_islower(c) && c != 170 && c != 186) { + if (mb_islower(c) && c != 170 && c != 186) { return OK; } break; @@ -4391,8 +4391,9 @@ static int check_char_class(int class, int c) return OK; break; case NFA_CLASS_UPPER: - if (vim_isupper(c)) + if (mb_isupper(c)) { return OK; + } break; case NFA_CLASS_XDIGIT: if (ascii_isxdigit(c)) @@ -4892,7 +4893,7 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text) int c2_len = PTR2LEN(s2); int c2 = PTR2CHAR(s2); - if ((c1 != c2 && (!ireg_ic || vim_tolower(c1) != vim_tolower(c2))) + if ((c1 != c2 && (!ireg_ic || mb_tolower(c1) != mb_tolower(c2))) || c1_len != c2_len) { match = false; break; @@ -5585,22 +5586,24 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, break; } if (ireg_ic) { - int curc_low = vim_tolower(curc); - int done = FALSE; + int curc_low = mb_tolower(curc); + int done = false; - for (; c1 <= c2; ++c1) - if (vim_tolower(c1) == curc_low) { + for (; c1 <= c2; c1++) { + if (mb_tolower(c1) == curc_low) { result = result_if_matched; done = TRUE; break; } - if (done) + } + if (done) { break; + } } } else if (state->c < 0 ? check_char_class(state->c, curc) : (curc == state->c - || (ireg_ic && vim_tolower(curc) - == vim_tolower(state->c)))) { + || (ireg_ic && mb_tolower(curc) + == mb_tolower(state->c)))) { result = result_if_matched; break; } @@ -6003,8 +6006,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, #endif result = (c == curc); - if (!result && ireg_ic) - result = vim_tolower(c) == vim_tolower(curc); + if (!result && ireg_ic) { + result = mb_tolower(c) == mb_tolower(curc); + } // If ireg_icombine is not set only skip over the character // itself. When it is set skip over composing characters. @@ -6152,8 +6156,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // Checking if the required start character matches is // cheaper than adding a state that won't match. c = PTR2CHAR(reginput + clen); - if (c != prog->regstart && (!ireg_ic || vim_tolower(c) - != vim_tolower(prog->regstart))) { + if (c != prog->regstart && (!ireg_ic || mb_tolower(c) + != mb_tolower(prog->regstart))) { #ifdef REGEXP_DEBUG fprintf(log_fd, " Skipping start state, regstart does not match\n"); diff --git a/src/nvim/search.c b/src/nvim/search.c index c5c92b41c5..91a558045f 100644 --- a/src/nvim/search.c +++ b/src/nvim/search.c @@ -335,23 +335,26 @@ int pat_has_uppercase(char_u *pat) while (*p != NUL) { int l; - if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) { - if (enc_utf8 && utf_isupper(utf_ptr2char(p))) - return TRUE; + if ((l = mb_ptr2len(p)) > 1) { + if (mb_isupper(utf_ptr2char(p))) { + return true; + } p += l; } else if (*p == '\\') { - if (p[1] == '_' && p[2] != NUL) /* skip "\_X" */ + if (p[1] == '_' && p[2] != NUL) { // skip "\_X" p += 3; - else if (p[1] == '%' && p[2] != NUL) /* skip "\%X" */ + } else if (p[1] == '%' && p[2] != NUL) { // skip "\%X" p += 3; - else if (p[1] != NUL) /* skip "\X" */ + } else if (p[1] != NUL) { // skip "\X" p += 2; - else + } else { p += 1; - } else if (vim_isupper(*p)) - return TRUE; - else - ++p; + } + } else if (mb_isupper(*p)) { + return true; + } else { + p++; + } } return FALSE; } diff --git a/src/nvim/spell.c b/src/nvim/spell.c index d4f49bffb2..18febda1d8 100644 --- a/src/nvim/spell.c +++ b/src/nvim/spell.c @@ -2526,8 +2526,7 @@ void clear_spell_chartab(spelltab_T *sp) } } -// Init the chartab used for spelling. Only depends on 'encoding'. -// Called once while starting up and when 'encoding' changes. +// Init the chartab used for spelling. Called once while starting up. // The default is to use isalpha(), but the spell file should define the word // characters to make it possible that 'encoding' differs from the current // locale. For utf-8 we don't use isalpha() but our own functions. @@ -2537,36 +2536,17 @@ void init_spell_chartab(void) did_set_spelltab = false; clear_spell_chartab(&spelltab); - if (enc_dbcs) { - // DBCS: assume double-wide characters are word characters. - for (i = 128; i <= 255; ++i) - if (MB_BYTE2LEN(i) == 2) - spelltab.st_isw[i] = true; - } else if (enc_utf8) { - for (i = 128; i < 256; ++i) { - int f = utf_fold(i); - int u = utf_toupper(i); - - spelltab.st_isu[i] = utf_isupper(i); - spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i); - // The folded/upper-cased value is different between latin1 and - // utf8 for 0xb5, causing E763 for no good reason. Use the latin1 - // value for utf-8 to avoid this. - spelltab.st_fold[i] = (f < 256) ? f : i; - spelltab.st_upper[i] = (u < 256) ? u : i; - } - } else { - // Rough guess: use locale-dependent library functions. - for (i = 128; i < 256; ++i) { - if (vim_isupper(i)) { - spelltab.st_isw[i] = true; - spelltab.st_isu[i] = true; - spelltab.st_fold[i] = vim_tolower(i); - } else if (vim_islower(i)) { - spelltab.st_isw[i] = true; - spelltab.st_upper[i] = vim_toupper(i); - } - } + for (i = 128; i < 256; i++) { + int f = utf_fold(i); + int u = mb_toupper(i); + + spelltab.st_isu[i] = mb_isupper(i); + spelltab.st_isw[i] = spelltab.st_isu[i] || mb_islower(i); + // The folded/upper-cased value is different between latin1 and + // utf8 for 0xb5, causing E763 for no good reason. Use the latin1 + // value for utf-8 to avoid this. + spelltab.st_fold[i] = (f < 256) ? f : i; + spelltab.st_upper[i] = (u < 256) ? u : i; } } diff --git a/src/nvim/spell_defs.h b/src/nvim/spell_defs.h index c54a7f5390..ddd54c724e 100644 --- a/src/nvim/spell_defs.h +++ b/src/nvim/spell_defs.h @@ -265,11 +265,11 @@ typedef struct trystate_S { : (c) < \ 256 ? (int)spelltab.st_fold[c] : (int)towlower(c)) -#define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \ +#define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? mb_toupper(c) \ : (c) < \ 256 ? (int)spelltab.st_upper[c] : (int)towupper(c)) -#define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \ +#define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? mb_isupper(c) \ : (c) < 256 ? spelltab.st_isu[c] : iswupper(c)) // First language that is loaded, start of the linked list of loaded diff --git a/src/nvim/spellfile.c b/src/nvim/spellfile.c index bbef1f5032..1da71dc4f9 100644 --- a/src/nvim/spellfile.c +++ b/src/nvim/spellfile.c @@ -225,6 +225,7 @@ #include <stdio.h> #include <stdint.h> #include <wctype.h> +#include <strings.h> #include "nvim/vim.h" #include "nvim/spell_defs.h" @@ -267,7 +268,7 @@ #define SAL_REM_ACCENTS 4 #define VIMSPELLMAGIC "VIMspell" // string at start of Vim spell file -#define VIMSPELLMAGICL 8 +#define VIMSPELLMAGICL (sizeof(VIMSPELLMAGIC) - 1) #define VIMSPELLVERSION 50 // Section IDs. Only renumber them when VIMSPELLVERSION changes! @@ -494,6 +495,64 @@ typedef struct spellinfo_S { # include "spellfile.c.generated.h" #endif +/// Read n bytes from fd to buf, returning on errors +/// +/// @param[out] buf Buffer to read to, must be at least n bytes long. +/// @param[in] n Amount of bytes to read. +/// @param fd FILE* to read from. +/// @param exit_code Code to run before returning. +/// +/// @return Allows to proceed if everything is OK, returns SP_TRUNCERROR if +/// there are not enough bytes, returns SP_OTHERERROR if reading failed. +#define SPELL_READ_BYTES(buf, n, fd, exit_code) \ + do { \ + const size_t n__SPRB = (n); \ + FILE *const fd__SPRB = (fd); \ + char *const buf__SPRB = (buf); \ + const size_t read_bytes__SPRB = fread(buf__SPRB, 1, n__SPRB, fd__SPRB); \ + if (read_bytes__SPRB != n__SPRB) { \ + exit_code; \ + return feof(fd__SPRB) ? SP_TRUNCERROR : SP_OTHERERROR; \ + } \ + } while (0) + +/// Like #SPELL_READ_BYTES, but also error out if NUL byte was read +/// +/// @return Allows to proceed if everything is OK, returns SP_TRUNCERROR if +/// there are not enough bytes, returns SP_OTHERERROR if reading failed, +/// returns SP_FORMERROR if read out a NUL byte. +#define SPELL_READ_NONNUL_BYTES(buf, n, fd, exit_code) \ + do { \ + const size_t n__SPRNB = (n); \ + FILE *const fd__SPRNB = (fd); \ + char *const buf__SPRNB = (buf); \ + SPELL_READ_BYTES(buf__SPRNB, n__SPRNB, fd__SPRNB, exit_code); \ + if (memchr(buf__SPRNB, NUL, (size_t)n__SPRNB)) { \ + exit_code; \ + return SP_FORMERROR; \ + } \ + } while (0) + +/// Check that spell file starts with a magic string +/// +/// Does not check for version of the file. +/// +/// @param fd File to check. +/// +/// @return 0 in case of success, SP_TRUNCERROR if file contains not enough +/// bytes, SP_FORMERROR if it does not match magic string and +/// SP_OTHERERROR if reading file failed. +static inline int spell_check_magic_string(FILE *const fd) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE +{ + char buf[VIMSPELLMAGICL]; + SPELL_READ_BYTES(buf, VIMSPELLMAGICL, fd, ;); + if (memcmp(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) { + return SP_FORMERROR; + } + return 0; +} + // Load one spell file and store the info into a slang_T. // // This is invoked in three ways: @@ -514,9 +573,7 @@ spell_load_file ( ) { FILE *fd; - char_u buf[VIMSPELLMAGICL]; char_u *p; - int i; int n; int len; char_u *save_sourcing_name = sourcing_name; @@ -558,11 +615,20 @@ spell_load_file ( sourcing_lnum = 0; // <HEADER>: <fileID> - for (i = 0; i < VIMSPELLMAGICL; ++i) - buf[i] = getc(fd); // <fileID> - if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) { - EMSG(_("E757: This does not look like a spell file")); - goto endFAIL; + const int scms_ret = spell_check_magic_string(fd); + switch (scms_ret) { + case SP_FORMERROR: + case SP_TRUNCERROR: { + emsgf(_("E757: This does not look like a spell file")); + goto endFAIL; + } + case SP_OTHERERROR: { + emsgf(_("E5042: Failed to read spell file %s: %s"), + fname, strerror(ferror(fd))); + } + case 0: { + break; + } } c = getc(fd); // <versionnr> if (c < VIMSPELLVERSION) { @@ -935,12 +1001,10 @@ static char_u *read_cnt_string(FILE *fd, int cnt_bytes, int *cntp) // Return SP_*ERROR flags. static int read_region_section(FILE *fd, slang_T *lp, int len) { - int i; - - if (len > 16) + if (len > 16) { return SP_FORMERROR; - for (i = 0; i < len; ++i) - lp->sl_regions[i] = getc(fd); // <regionname> + } + SPELL_READ_NONNUL_BYTES((char *)lp->sl_regions, (size_t)len, fd, ;); lp->sl_regions[len] = NUL; return 0; } @@ -983,35 +1047,30 @@ static int read_charflags_section(FILE *fd) // Return SP_*ERROR flags. static int read_prefcond_section(FILE *fd, slang_T *lp) { - int cnt; - int i; - int n; - char_u *p; - char_u buf[MAXWLEN + 1]; - // <prefcondcnt> <prefcond> ... - cnt = get2c(fd); // <prefcondcnt> - if (cnt <= 0) + const int cnt = get2c(fd); // <prefcondcnt> + if (cnt <= 0) { return SP_FORMERROR; + } lp->sl_prefprog = xcalloc(cnt, sizeof(regprog_T *)); lp->sl_prefixcnt = cnt; - for (i = 0; i < cnt; ++i) { + for (int i = 0; i < cnt; i++) { // <prefcond> : <condlen> <condstr> - n = getc(fd); // <condlen> - if (n < 0 || n >= MAXWLEN) + const int n = getc(fd); // <condlen> + if (n < 0 || n >= MAXWLEN) { return SP_FORMERROR; + } // When <condlen> is zero we have an empty condition. Otherwise // compile the regexp program used to check for the condition. if (n > 0) { - buf[0] = '^'; // always match at one position only - p = buf + 1; - while (n-- > 0) - *p++ = getc(fd); // <condstr> - *p = NUL; - lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING); + char buf[MAXWLEN + 1]; + buf[0] = '^'; // always match at one position only + SPELL_READ_NONNUL_BYTES(buf + 1, (size_t)n, fd, ;); + buf[n + 1] = NUL; + lp->sl_prefprog[i] = vim_regcomp((char_u *)buf, RE_MAGIC | RE_STRING); } } return 0; @@ -1064,7 +1123,6 @@ static int read_rep_section(FILE *fd, garray_T *gap, int16_t *first) // Return SP_*ERROR flags. static int read_sal_section(FILE *fd, slang_T *slang) { - int i; int cnt; garray_T *gap; salitem_T *smp; @@ -1074,13 +1132,16 @@ static int read_sal_section(FILE *fd, slang_T *slang) slang->sl_sofo = false; - i = getc(fd); // <salflags> - if (i & SAL_F0LLOWUP) + const int flags = getc(fd); // <salflags> + if (flags & SAL_F0LLOWUP) { slang->sl_followup = true; - if (i & SAL_COLLAPSE) + } + if (flags & SAL_COLLAPSE) { slang->sl_collapse = true; - if (i & SAL_REM_ACCENTS) + } + if (flags & SAL_REM_ACCENTS) { slang->sl_rem_accents = true; + } cnt = get2c(fd); // <salcount> if (cnt < 0) @@ -1100,7 +1161,8 @@ static int read_sal_section(FILE *fd, slang_T *slang) smp->sm_lead = p; // Read up to the first special char into sm_lead. - for (i = 0; i < ccnt; ++i) { + int i = 0; + for (; i < ccnt; ++i) { c = getc(fd); // <salfrom> if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL) break; @@ -1126,11 +1188,17 @@ static int read_sal_section(FILE *fd, slang_T *slang) // Any following chars go in sm_rules. smp->sm_rules = p; - if (i < ccnt) + if (i < ccnt) { // store the char we got while checking for end of sm_lead *p++ = c; - for (++i; i < ccnt; ++i) - *p++ = getc(fd); // <salfrom> + } + i++; + if (i < ccnt) { + SPELL_READ_NONNUL_BYTES( // <salfrom> + (char *)p, (size_t)(ccnt - i), fd, xfree(smp->sm_lead)); + p += (ccnt - i); + i = ccnt; + } *p++ = NUL; // <saltolen> <salto> diff --git a/src/nvim/strings.c b/src/nvim/strings.c index 5dcffe00e0..87e066d80a 100644 --- a/src/nvim/strings.c +++ b/src/nvim/strings.c @@ -291,14 +291,15 @@ void vim_strup(char_u *p) } } -/// Make given string all upper-case +/// Make given string all upper-case or all lower-case /// -/// Handels multi-byte characters as good as possible. +/// Handles multi-byte characters as good as possible. /// /// @param[in] orig Input string. +/// @param[in] upper If true make uppercase, otherwise lowercase /// /// @return [allocated] upper-cased string. -char *strup_save(const char *const orig) +char *strcase_save(const char *const orig, bool upper) FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL { char *res = xstrdup(orig); @@ -307,33 +308,25 @@ char *strup_save(const char *const orig) while (*p != NUL) { int l; - if (enc_utf8) { - int c = utf_ptr2char((const char_u *)p); - int uc = utf_toupper(c); - - // Reallocate string when byte count changes. This is rare, - // thus it's OK to do another malloc()/free(). - l = utf_ptr2len((const char_u *)p); - int newl = utf_char2len(uc); - if (newl != l) { - // TODO(philix): use xrealloc() in strup_save() - char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l)); - memcpy(s, res, (size_t)(p - res)); - STRCPY(s + (p - res) + newl, p + l); - p = s + (p - res); - xfree(res); - res = s; - } - - utf_char2bytes(uc, (char_u *)p); - p += newl; - } else if (has_mbyte && (l = (*mb_ptr2len)((const char_u *)p)) > 1) { - p += l; // Skip multi-byte character. - } else { - // note that toupper() can be a macro - *p = (char)(uint8_t)TOUPPER_LOC(*p); - p++; + int c = utf_ptr2char((const char_u *)p); + int uc = upper ? mb_toupper(c) : mb_tolower(c); + + // Reallocate string when byte count changes. This is rare, + // thus it's OK to do another malloc()/free(). + l = utf_ptr2len((const char_u *)p); + int newl = utf_char2len(uc); + if (newl != l) { + // TODO(philix): use xrealloc() in strup_save() + char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l)); + memcpy(s, res, (size_t)(p - res)); + STRCPY(s + (p - res) + newl, p + l); + p = s + (p - res); + xfree(res); + res = s; } + + utf_char2bytes(uc, (char_u *)p); + p += newl; } return res; diff --git a/src/nvim/syntax.c b/src/nvim/syntax.c index e36b00d770..1ed65ec52a 100644 --- a/src/nvim/syntax.c +++ b/src/nvim/syntax.c @@ -4246,83 +4246,81 @@ static void syn_cmd_keyword(exarg_T *eap, int syncing) if (rest != NULL) { syn_id = syn_check_group(arg, (int)(group_name_end - arg)); - if (syn_id != 0) - /* allocate a buffer, for removing backslashes in the keyword */ + if (syn_id != 0) { + // Allocate a buffer, for removing backslashes in the keyword. keyword_copy = xmalloc(STRLEN(rest) + 1); - syn_opt_arg.flags = 0; - syn_opt_arg.keyword = TRUE; - syn_opt_arg.sync_idx = NULL; - syn_opt_arg.has_cont_list = FALSE; - syn_opt_arg.cont_in_list = NULL; - syn_opt_arg.next_list = NULL; - - /* - * The options given apply to ALL keywords, so all options must be - * found before keywords can be created. - * 1: collect the options and copy the keywords to keyword_copy. - */ - cnt = 0; - p = keyword_copy; - for (; rest != NULL && !ends_excmd(*rest); rest = skipwhite(rest)) { - rest = get_syn_options(rest, &syn_opt_arg, &conceal_char); - if (rest == NULL || ends_excmd(*rest)) - break; - /* Copy the keyword, removing backslashes, and add a NUL. */ - while (*rest != NUL && !ascii_iswhite(*rest)) { - if (*rest == '\\' && rest[1] != NUL) - ++rest; - *p++ = *rest++; - } - *p++ = NUL; - ++cnt; } + if (keyword_copy != NULL) { + syn_opt_arg.flags = 0; + syn_opt_arg.keyword = true; + syn_opt_arg.sync_idx = NULL; + syn_opt_arg.has_cont_list = false; + syn_opt_arg.cont_in_list = NULL; + syn_opt_arg.next_list = NULL; + + // The options given apply to ALL keywords, so all options must be + // found before keywords can be created. + // 1: collect the options and copy the keywords to keyword_copy. + cnt = 0; + p = keyword_copy; + for (; rest != NULL && !ends_excmd(*rest); rest = skipwhite(rest)) { + rest = get_syn_options(rest, &syn_opt_arg, &conceal_char); + if (rest == NULL || ends_excmd(*rest)) { + break; + } + // Copy the keyword, removing backslashes, and add a NUL. + while (*rest != NUL && !ascii_iswhite(*rest)) { + if (*rest == '\\' && rest[1] != NUL) { + rest++; + } + *p++ = *rest++; + } + *p++ = NUL; + cnt++; + } - if (!eap->skip) { - /* Adjust flags for use of ":syn include". */ - syn_incl_toplevel(syn_id, &syn_opt_arg.flags); + if (!eap->skip) { + // Adjust flags for use of ":syn include". + syn_incl_toplevel(syn_id, &syn_opt_arg.flags); - /* - * 2: Add an entry for each keyword. - */ - for (kw = keyword_copy; --cnt >= 0; kw += STRLEN(kw) + 1) { - for (p = vim_strchr(kw, '[');; ) { - if (p != NULL) - *p = NUL; - add_keyword(kw, syn_id, syn_opt_arg.flags, - syn_opt_arg.cont_in_list, - syn_opt_arg.next_list, conceal_char); - if (p == NULL) - break; - if (p[1] == NUL) { - EMSG2(_("E789: Missing ']': %s"), kw); - goto error; - } - if (p[1] == ']') { - if (p[2] != NUL) { - EMSG3(_("E890: trailing char after ']': %s]%s"), - kw, &p[2]); + // 2: Add an entry for each keyword. + for (kw = keyword_copy; --cnt >= 0; kw += STRLEN(kw) + 1) { + for (p = vim_strchr(kw, '[');; ) { + if (p != NULL) { + *p = NUL; + } + add_keyword(kw, syn_id, syn_opt_arg.flags, + syn_opt_arg.cont_in_list, + syn_opt_arg.next_list, conceal_char); + if (p == NULL) { + break; + } + if (p[1] == NUL) { + emsgf(_("E789: Missing ']': %s"), kw); goto error; } - kw = p + 1; - break; // skip over the "]" - } - if (has_mbyte) { - int l = (*mb_ptr2len)(p + 1); + if (p[1] == ']') { + if (p[2] != NUL) { + emsgf(_("E890: trailing char after ']': %s]%s"), + kw, &p[2]); + goto error; + } + kw = p + 1; + break; // skip over the "]" + } + const int l = (*mb_ptr2len)(p + 1); memmove(p, p + 1, l); p += l; - } else { - p[0] = p[1]; - ++p; } } } - } error: - xfree(keyword_copy); - xfree(syn_opt_arg.cont_in_list); - xfree(syn_opt_arg.next_list); + xfree(keyword_copy); + xfree(syn_opt_arg.cont_in_list); + xfree(syn_opt_arg.next_list); + } } if (rest != NULL) diff --git a/src/nvim/testdir/test_functions.vim b/src/nvim/testdir/test_functions.vim index 81cb6314ce..3c258299c1 100644 --- a/src/nvim/testdir/test_functions.vim +++ b/src/nvim/testdir/test_functions.vim @@ -29,3 +29,147 @@ func Test_setbufvar_options() bwipe! endfunc +func Test_tolower() + call assert_equal("", tolower("")) + + " Test with all printable ASCII characters. + call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', + \ tolower(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~')) + + if !has('multi_byte') + return + endif + + " Test with a few uppercase diacritics. + call assert_equal("aàáâãäåāăąǎǟǡả", tolower("AÀÁÂÃÄÅĀĂĄǍǞǠẢ")) + call assert_equal("bḃḇ", tolower("BḂḆ")) + call assert_equal("cçćĉċč", tolower("CÇĆĈĊČ")) + call assert_equal("dďđḋḏḑ", tolower("DĎĐḊḎḐ")) + call assert_equal("eèéêëēĕėęěẻẽ", tolower("EÈÉÊËĒĔĖĘĚẺẼ")) + call assert_equal("fḟ ", tolower("FḞ ")) + call assert_equal("gĝğġģǥǧǵḡ", tolower("GĜĞĠĢǤǦǴḠ")) + call assert_equal("hĥħḣḧḩ", tolower("HĤĦḢḦḨ")) + call assert_equal("iìíîïĩīĭįiǐỉ", tolower("IÌÍÎÏĨĪĬĮİǏỈ")) + call assert_equal("jĵ", tolower("JĴ")) + call assert_equal("kķǩḱḵ", tolower("KĶǨḰḴ")) + call assert_equal("lĺļľŀłḻ", tolower("LĹĻĽĿŁḺ")) + call assert_equal("mḿṁ", tolower("MḾṀ")) + call assert_equal("nñńņňṅṉ", tolower("NÑŃŅŇṄṈ")) + call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ")) + call assert_equal("pṕṗ", tolower("PṔṖ")) + call assert_equal("q", tolower("Q")) + call assert_equal("rŕŗřṙṟ", tolower("RŔŖŘṘṞ")) + call assert_equal("sśŝşšṡ", tolower("SŚŜŞŠṠ")) + call assert_equal("tţťŧṫṯ", tolower("TŢŤŦṪṮ")) + call assert_equal("uùúûüũūŭůűųưǔủ", tolower("UÙÚÛÜŨŪŬŮŰŲƯǓỦ")) + call assert_equal("vṽ", tolower("VṼ")) + call assert_equal("wŵẁẃẅẇ", tolower("WŴẀẂẄẆ")) + call assert_equal("xẋẍ", tolower("XẊẌ")) + call assert_equal("yýŷÿẏỳỷỹ", tolower("YÝŶŸẎỲỶỸ")) + call assert_equal("zźżžƶẑẕ", tolower("ZŹŻŽƵẐẔ")) + + " Test with a few lowercase diacritics, which should remain unchanged. + call assert_equal("aàáâãäåāăąǎǟǡả", tolower("aàáâãäåāăąǎǟǡả")) + call assert_equal("bḃḇ", tolower("bḃḇ")) + call assert_equal("cçćĉċč", tolower("cçćĉċč")) + call assert_equal("dďđḋḏḑ", tolower("dďđḋḏḑ")) + call assert_equal("eèéêëēĕėęěẻẽ", tolower("eèéêëēĕėęěẻẽ")) + call assert_equal("fḟ", tolower("fḟ")) + call assert_equal("gĝğġģǥǧǵḡ", tolower("gĝğġģǥǧǵḡ")) + call assert_equal("hĥħḣḧḩẖ", tolower("hĥħḣḧḩẖ")) + call assert_equal("iìíîïĩīĭįǐỉ", tolower("iìíîïĩīĭįǐỉ")) + call assert_equal("jĵǰ", tolower("jĵǰ")) + call assert_equal("kķǩḱḵ", tolower("kķǩḱḵ")) + call assert_equal("lĺļľŀłḻ", tolower("lĺļľŀłḻ")) + call assert_equal("mḿṁ ", tolower("mḿṁ ")) + call assert_equal("nñńņňʼnṅṉ", tolower("nñńņňʼnṅṉ")) + call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("oòóôõöøōŏőơǒǫǭỏ")) + call assert_equal("pṕṗ", tolower("pṕṗ")) + call assert_equal("q", tolower("q")) + call assert_equal("rŕŗřṙṟ", tolower("rŕŗřṙṟ")) + call assert_equal("sśŝşšṡ", tolower("sśŝşšṡ")) + call assert_equal("tţťŧṫṯẗ", tolower("tţťŧṫṯẗ")) + call assert_equal("uùúûüũūŭůűųưǔủ", tolower("uùúûüũūŭůűųưǔủ")) + call assert_equal("vṽ", tolower("vṽ")) + call assert_equal("wŵẁẃẅẇẘ", tolower("wŵẁẃẅẇẘ")) + call assert_equal("ẋẍ", tolower("ẋẍ")) + call assert_equal("yýÿŷẏẙỳỷỹ", tolower("yýÿŷẏẙỳỷỹ")) + call assert_equal("zźżžƶẑẕ", tolower("zźżžƶẑẕ")) + + " According to https://twitter.com/jifa/status/625776454479970304 + " Ⱥ (U+023A) and Ⱦ (U+023E) are the *only* code points to increase + " in length (2 to 3 bytes) when lowercased. So let's test them. + call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ")) +endfunc + +func Test_toupper() + call assert_equal("", toupper("")) + + " Test with all printable ASCII characters. + call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~', + \ toupper(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~')) + + if !has('multi_byte') + return + endif + + " Test with a few lowercase diacritics. + call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("aàáâãäåāăąǎǟǡả")) + call assert_equal("BḂḆ", toupper("bḃḇ")) + call assert_equal("CÇĆĈĊČ", toupper("cçćĉċč")) + call assert_equal("DĎĐḊḎḐ", toupper("dďđḋḏḑ")) + call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("eèéêëēĕėęěẻẽ")) + call assert_equal("FḞ", toupper("fḟ")) + call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("gĝğġģǥǧǵḡ")) + call assert_equal("HĤĦḢḦḨẖ", toupper("hĥħḣḧḩẖ")) + call assert_equal("IÌÍÎÏĨĪĬĮǏỈ", toupper("iìíîïĩīĭįǐỉ")) + call assert_equal("JĴǰ", toupper("jĵǰ")) + call assert_equal("KĶǨḰḴ", toupper("kķǩḱḵ")) + call assert_equal("LĹĻĽĿŁḺ", toupper("lĺļľŀłḻ")) + call assert_equal("MḾṀ ", toupper("mḿṁ ")) + call assert_equal("NÑŃŅŇʼnṄṈ", toupper("nñńņňʼnṅṉ")) + call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("oòóôõöøōŏőơǒǫǭỏ")) + call assert_equal("PṔṖ", toupper("pṕṗ")) + call assert_equal("Q", toupper("q")) + call assert_equal("RŔŖŘṘṞ", toupper("rŕŗřṙṟ")) + call assert_equal("SŚŜŞŠṠ", toupper("sśŝşšṡ")) + call assert_equal("TŢŤŦṪṮẗ", toupper("tţťŧṫṯẗ")) + call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("uùúûüũūŭůűųưǔủ")) + call assert_equal("VṼ", toupper("vṽ")) + call assert_equal("WŴẀẂẄẆẘ", toupper("wŵẁẃẅẇẘ")) + call assert_equal("ẊẌ", toupper("ẋẍ")) + call assert_equal("YÝŸŶẎẙỲỶỸ", toupper("yýÿŷẏẙỳỷỹ")) + call assert_equal("ZŹŻŽƵẐẔ", toupper("zźżžƶẑẕ")) + + " Test that uppercase diacritics, which should remain unchanged. + call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("AÀÁÂÃÄÅĀĂĄǍǞǠẢ")) + call assert_equal("BḂḆ", toupper("BḂḆ")) + call assert_equal("CÇĆĈĊČ", toupper("CÇĆĈĊČ")) + call assert_equal("DĎĐḊḎḐ", toupper("DĎĐḊḎḐ")) + call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("EÈÉÊËĒĔĖĘĚẺẼ")) + call assert_equal("FḞ ", toupper("FḞ ")) + call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("GĜĞĠĢǤǦǴḠ")) + call assert_equal("HĤĦḢḦḨ", toupper("HĤĦḢḦḨ")) + call assert_equal("IÌÍÎÏĨĪĬĮİǏỈ", toupper("IÌÍÎÏĨĪĬĮİǏỈ")) + call assert_equal("JĴ", toupper("JĴ")) + call assert_equal("KĶǨḰḴ", toupper("KĶǨḰḴ")) + call assert_equal("LĹĻĽĿŁḺ", toupper("LĹĻĽĿŁḺ")) + call assert_equal("MḾṀ", toupper("MḾṀ")) + call assert_equal("NÑŃŅŇṄṈ", toupper("NÑŃŅŇṄṈ")) + call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ")) + call assert_equal("PṔṖ", toupper("PṔṖ")) + call assert_equal("Q", toupper("Q")) + call assert_equal("RŔŖŘṘṞ", toupper("RŔŖŘṘṞ")) + call assert_equal("SŚŜŞŠṠ", toupper("SŚŜŞŠṠ")) + call assert_equal("TŢŤŦṪṮ", toupper("TŢŤŦṪṮ")) + call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("UÙÚÛÜŨŪŬŮŰŲƯǓỦ")) + call assert_equal("VṼ", toupper("VṼ")) + call assert_equal("WŴẀẂẄẆ", toupper("WŴẀẂẄẆ")) + call assert_equal("XẊẌ", toupper("XẊẌ")) + call assert_equal("YÝŶŸẎỲỶỸ", toupper("YÝŶŸẎỲỶỸ")) + call assert_equal("ZŹŻŽƵẐẔ", toupper("ZŹŻŽƵẐẔ")) + + call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ")) +endfunc + + diff --git a/src/nvim/testdir/test_normal.vim b/src/nvim/testdir/test_normal.vim index a22dca35cc..c529971528 100644 --- a/src/nvim/testdir/test_normal.vim +++ b/src/nvim/testdir/test_normal.vim @@ -1606,6 +1606,40 @@ fun! Test_normal30_changecase() norm! V~ call assert_equal('THIS IS A simple test: äüöss', getline('.')) + " Turkish ASCII turns to multi-byte. On Mac the Turkish locale is available + " but toupper()/tolower() don't do the right thing. + if !has('mac') && !has('osx') + try + lang tr_TR.UTF-8 + set casemap= + call setline(1, 'iI') + 1normal gUU + call assert_equal("\u0130I", getline(1)) + call assert_equal("\u0130I", toupper("iI")) + + call setline(1, 'iI') + 1normal guu + call assert_equal("i\u0131", getline(1)) + call assert_equal("i\u0131", tolower("iI")) + + set casemap& + call setline(1, 'iI') + 1normal gUU + call assert_equal("II", getline(1)) + call assert_equal("II", toupper("iI")) + + call setline(1, 'iI') + 1normal guu + call assert_equal("ii", getline(1)) + call assert_equal("ii", tolower("iI")) + + lang en_US.UTF-8 + catch /E197:/ + " can't use Turkish locale + throw 'Skipped: Turkish locale not available' + endtry + endif + " clean up bw! endfunc diff --git a/test/functional/ex_cmds/syntax_spec.lua b/test/functional/ex_cmds/syntax_spec.lua new file mode 100644 index 0000000000..c9e96703de --- /dev/null +++ b/test/functional/ex_cmds/syntax_spec.lua @@ -0,0 +1,17 @@ +local helpers = require('test.functional.helpers')(after_each) + +local eq = helpers.eq +local clear = helpers.clear +local exc_exec = helpers.exc_exec + +describe(':syntax', function() + before_each(clear) + + describe('keyword', function() + it('does not crash when group name contains unprintable characters', + function() + eq('Vim(syntax):E669: Unprintable character in group name', + exc_exec('syntax keyword \024 foo bar')) + end) + end) +end) diff --git a/test/functional/normal/lang_spec.lua b/test/functional/normal/lang_spec.lua new file mode 100644 index 0000000000..464b85d684 --- /dev/null +++ b/test/functional/normal/lang_spec.lua @@ -0,0 +1,59 @@ +local helpers = require('test.functional.helpers')(after_each) +local clear, insert, eq = helpers.clear, helpers.insert, helpers.eq +local execute, expect = helpers.execute, helpers.expect +local feed, eval = helpers.feed, helpers.eval +local exc_exec = helpers.exc_exec + +describe('gu and gU', function() + before_each(clear) + + it('works in any locale with default casemap', function() + eq('internal,keepascii', eval('&casemap')) + insert("iI") + feed("VgU") + expect("II") + feed("Vgu") + expect("ii") + end) + + describe('works in Turkish locale', function() + if helpers.pending_win32(pending) then return end + + clear() + if eval('has("mac")') ~= 0 then + pending("not yet on macOS", function() end) + return + end + + local err = exc_exec('lang ctype tr_TR.UTF-8') + if err ~= 0 then + pending("Locale tr_TR.UTF-8 not supported", function() end) + return + end + + before_each(function() + execute('lang ctype tr_TR.UTF-8') + end) + + it('with default casemap', function() + eq('internal,keepascii', eval('&casemap')) + -- expect ASCII behavior + insert("iI") + feed("VgU") + expect("II") + feed("Vgu") + expect("ii") + end) + + it('with casemap=""', function() + execute('set casemap=') + -- expect Turkish locale behavior + insert("iI") + feed("VgU") + expect("İI") + feed("Vgu") + expect("iı") + end) + + end) +end) diff --git a/test/functional/spell/spellfile_spec.lua b/test/functional/spell/spellfile_spec.lua new file mode 100644 index 0000000000..e7cd10d2ac --- /dev/null +++ b/test/functional/spell/spellfile_spec.lua @@ -0,0 +1,108 @@ +local helpers = require('test.functional.helpers')(after_each) +local lfs = require('lfs') + +local eq = helpers.eq +local clear = helpers.clear +local meths = helpers.meths +local exc_exec = helpers.exc_exec +local write_file = helpers.write_file + +local testdir = 'Xtest-functional-spell-spellfile.d' + +describe('spellfile', function() + before_each(function() + clear() + lfs.mkdir(testdir) + lfs.mkdir(testdir .. '/spell') + end) + after_each(function() + lfs.rmdir(testdir) + end) + -- ┌ Magic string (#VIMSPELLMAGIC) + -- │ ┌ Spell file version (#VIMSPELLVERSION) + local spellheader = 'VIMspell\050' + it('errors out when prefcond section is truncated', function() + meths.set_option('runtimepath', testdir) + write_file(testdir .. '/spell/en.ascii.spl', + -- ┌ Section identifier (#SN_PREFCOND) + -- │ ┌ Section flags (#SNF_REQUIRED or zero) + -- │ │ ┌ Section length (4 bytes, MSB first) + spellheader .. '\003\001\000\000\000\003' + -- ┌ Number of regexes in section (2 bytes, MSB first) + -- │ ┌ Condition length (1 byte) + -- │ │ ┌ Condition regex (missing!) + .. '\000\001\001') + meths.set_option('spelllang', 'en') + eq('Vim(set):E758: Truncated spell file', + exc_exec('set spell')) + end) + it('errors out when prefcond regexp contains NUL byte', function() + meths.set_option('runtimepath', testdir) + write_file(testdir .. '/spell/en.ascii.spl', + -- ┌ Section identifier (#SN_PREFCOND) + -- │ ┌ Section flags (#SNF_REQUIRED or zero) + -- │ │ ┌ Section length (4 bytes, MSB first) + spellheader .. '\003\001\000\000\000\008' + -- ┌ Number of regexes in section (2 bytes, MSB first) + -- │ ┌ Condition length (1 byte) + -- │ │ ┌ Condition regex + -- │ │ │ ┌ End of sections marker + .. '\000\001\005ab\000cd\255' + -- ┌ LWORDTREE tree length (4 bytes) + -- │ ┌ KWORDTREE tree length (4 bytes) + -- │ │ ┌ PREFIXTREE tree length + .. '\000\000\000\000\000\000\000\000\000\000\000\000') + meths.set_option('spelllang', 'en') + eq('Vim(set):E759: Format error in spell file', + exc_exec('set spell')) + end) + it('errors out when region contains NUL byte', function() + meths.set_option('runtimepath', testdir) + write_file(testdir .. '/spell/en.ascii.spl', + -- ┌ Section identifier (#SN_REGION) + -- │ ┌ Section flags (#SNF_REQUIRED or zero) + -- │ │ ┌ Section length (4 bytes, MSB first) + spellheader .. '\000\001\000\000\000\008' + -- ┌ Regions ┌ End of sections marker + .. '01234\00067\255' + -- ┌ LWORDTREE tree length (4 bytes) + -- │ ┌ KWORDTREE tree length (4 bytes) + -- │ │ ┌ PREFIXTREE tree length + .. '\000\000\000\000\000\000\000\000\000\000\000\000') + meths.set_option('spelllang', 'en') + eq('Vim(set):E759: Format error in spell file', + exc_exec('set spell')) + end) + it('errors out when SAL section contains NUL byte', function() + meths.set_option('runtimepath', testdir) + write_file(testdir .. '/spell/en.ascii.spl', + -- ┌ Section identifier (#SN_SAL) + -- │ ┌ Section flags (#SNF_REQUIRED or zero) + -- │ │ ┌ Section length (4 bytes, MSB first) + spellheader .. '\005\001\000\000\000\008' + -- ┌ salflags + -- │ ┌ salcount (2 bytes, MSB first) + -- │ │ ┌ salfromlen (1 byte) + -- │ │ │ ┌ Special character + -- │ │ │ │┌ salfrom (should not contain NUL) + -- │ │ │ ││ ┌ saltolen + -- │ │ │ ││ │ ┌ salto + -- │ │ │ ││ │ │┌ End of sections marker + .. '\000\000\001\0024\000\0017\255' + -- ┌ LWORDTREE tree length (4 bytes) + -- │ ┌ KWORDTREE tree length (4 bytes) + -- │ │ ┌ PREFIXTREE tree length + .. '\000\000\000\000\000\000\000\000\000\000\000\000') + meths.set_option('spelllang', 'en') + eq('Vim(set):E759: Format error in spell file', + exc_exec('set spell')) + end) + it('errors out when spell header contains NUL bytes', function() + meths.set_option('runtimepath', testdir) + write_file(testdir .. '/spell/en.ascii.spl', + spellheader:sub(1, -3) .. '\000\000') + meths.set_option('spelllang', 'en') + eq('Vim(set):E757: This does not look like a spell file', + exc_exec('set spell')) + end) +end) |