aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/nvim/CMakeLists.txt1
-rw-r--r--src/nvim/charset.c69
-rw-r--r--src/nvim/edit.c25
-rw-r--r--src/nvim/eval.c28
-rw-r--r--src/nvim/ex_getln.c4
-rw-r--r--src/nvim/file_search.c2
-rw-r--r--src/nvim/macros.h2
-rw-r--r--src/nvim/mbyte.c31
-rw-r--r--src/nvim/message.c6
-rw-r--r--src/nvim/ops.c27
-rw-r--r--src/nvim/path.c6
-rw-r--r--src/nvim/regexp.c45
-rw-r--r--src/nvim/regexp_nfa.c32
-rw-r--r--src/nvim/search.c25
-rw-r--r--src/nvim/spell.c44
-rw-r--r--src/nvim/spell_defs.h4
-rw-r--r--src/nvim/spellfile.c146
-rw-r--r--src/nvim/strings.c51
-rw-r--r--src/nvim/syntax.c124
-rw-r--r--src/nvim/testdir/test_functions.vim144
-rw-r--r--src/nvim/testdir/test_normal.vim34
-rw-r--r--test/functional/ex_cmds/syntax_spec.lua17
-rw-r--r--test/functional/normal/lang_spec.lua59
-rw-r--r--test/functional/spell/spellfile_spec.lua108
24 files changed, 685 insertions, 349 deletions
diff --git a/src/nvim/CMakeLists.txt b/src/nvim/CMakeLists.txt
index cb003c2026..a91657f1bd 100644
--- a/src/nvim/CMakeLists.txt
+++ b/src/nvim/CMakeLists.txt
@@ -501,6 +501,7 @@ add_custom_command(
${CMAKE_COMMAND}
-DTARGET=${LINT_SUPPRESSES_ROOT}
-P ${LINT_SUPPRESSES_INSTALL_SCRIPT}
+ COMMAND ${CMAKE_COMMAND} -E touch ${LINT_SUPPRESSES_TOUCH_FILE}
DEPENDS
${LINT_SUPPRESSES_ARCHIVE} ${LINT_SUPPRESSES_INSTALL_SCRIPT}
)
diff --git a/src/nvim/charset.c b/src/nvim/charset.c
index 99d3e2dd88..645139f696 100644
--- a/src/nvim/charset.c
+++ b/src/nvim/charset.c
@@ -212,8 +212,8 @@ int buf_init_chartab(buf_T *buf, int global)
// work properly when 'encoding' is "latin1" and the locale is
// "C".
if (!do_isalpha
- || vim_islower(c)
- || vim_isupper(c)
+ || mb_islower(c)
+ || mb_isupper(c)
|| (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))) {
if (i == 0) {
// (re)set ID flag
@@ -417,11 +417,11 @@ char_u* str_foldcase(char_u *str, int orglen, char_u *buf, int buflen)
while (STR_CHAR(i) != NUL) {
int c = utf_ptr2char(STR_PTR(i));
int olen = utf_ptr2len(STR_PTR(i));
- int lc = utf_tolower(c);
+ int lc = mb_tolower(c);
// Only replace the character when it is not an invalid
// sequence (ASCII character or more than one byte) and
- // utf_tolower() doesn't return the original character.
+ // mb_tolower() doesn't return the original character.
if (((c < 0x80) || (olen > 1)) && (c != lc)) {
int nlen = utf_char2len(lc);
@@ -1506,67 +1506,6 @@ char_u* skiptohex(char_u *q)
return p;
}
-// Vim's own character class functions. These exist because many library
-// islower()/toupper() etc. do not work properly: they crash when used with
-// invalid values or can't handle latin1 when the locale is C.
-// Speed is most important here.
-
-/// Check that the character is lower-case
-///
-/// @param c character to check
-bool vim_islower(int c)
- FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
-{
- if (c <= '@') {
- return false;
- }
-
- if (c >= 0x80) {
- return utf_islower(c);
- }
- return islower(c);
-}
-
-/// Check that the character is upper-case
-///
-/// @param c character to check
-bool vim_isupper(int c)
- FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
-{
- if (c <= '@') {
- return false;
- }
-
- if (c >= 0x80) {
- return utf_isupper(c);
- }
- return isupper(c);
-}
-
-int vim_toupper(int c)
-{
- if (c <= '@') {
- return c;
- }
-
- if (c >= 0x80) {
- return utf_toupper(c);
- }
- return TOUPPER_LOC(c);
-}
-
-int vim_tolower(int c)
-{
- if (c <= '@') {
- return c;
- }
-
- if (c >= 0x80) {
- return utf_tolower(c);
- }
- return TOLOWER_LOC(c);
-}
-
/// Skip over text until ' ' or '\t' or NUL
///
/// @param[in] p Text to skip over.
diff --git a/src/nvim/edit.c b/src/nvim/edit.c
index b35504908e..fe00027dec 100644
--- a/src/nvim/edit.c
+++ b/src/nvim/edit.c
@@ -2037,12 +2037,12 @@ int ins_compl_add_infercase(char_u *str, int len, int icase, char_u *fname, int
} else {
c = *(p++);
}
- if (vim_islower(c)) {
+ if (mb_islower(c)) {
has_lower = true;
- if (vim_isupper(wca[i])) {
+ if (mb_isupper(wca[i])) {
// Rule 1 is satisfied.
for (i = actual_compl_length; i < actual_len; i++) {
- wca[i] = vim_tolower(wca[i]);
+ wca[i] = mb_tolower(wca[i]);
}
break;
}
@@ -2062,14 +2062,14 @@ int ins_compl_add_infercase(char_u *str, int len, int icase, char_u *fname, int
} else {
c = *(p++);
}
- if (was_letter && vim_isupper(c) && vim_islower(wca[i])) {
+ if (was_letter && mb_isupper(c) && mb_islower(wca[i])) {
// Rule 2 is satisfied.
for (i = actual_compl_length; i < actual_len; i++) {
- wca[i] = vim_toupper(wca[i]);
+ wca[i] = mb_toupper(wca[i]);
}
break;
}
- was_letter = vim_islower(c) || vim_isupper(c);
+ was_letter = mb_islower(c) || mb_isupper(c);
}
}
@@ -2082,10 +2082,10 @@ int ins_compl_add_infercase(char_u *str, int len, int icase, char_u *fname, int
} else {
c = *(p++);
}
- if (vim_islower(c)) {
- wca[i] = vim_tolower(wca[i]);
- } else if (vim_isupper(c)) {
- wca[i] = vim_toupper(wca[i]);
+ if (mb_islower(c)) {
+ wca[i] = mb_tolower(wca[i]);
+ } else if (mb_isupper(c)) {
+ wca[i] = mb_toupper(wca[i]);
}
}
}
@@ -2302,9 +2302,10 @@ static void ins_compl_longest_match(compl_T *match)
c1 = *p;
c2 = *s;
}
- if (match->cp_icase ? (vim_tolower(c1) != vim_tolower(c2))
- : (c1 != c2))
+ if (match->cp_icase ? (mb_tolower(c1) != mb_tolower(c2))
+ : (c1 != c2)) {
break;
+ }
if (has_mbyte) {
mb_ptr_adv(p);
mb_ptr_adv(s);
diff --git a/src/nvim/eval.c b/src/nvim/eval.c
index 124d6acfe9..0663e19b9a 100644
--- a/src/nvim/eval.c
+++ b/src/nvim/eval.c
@@ -16791,30 +16791,9 @@ void timer_teardown(void)
*/
static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr)
{
- char_u *p = (char_u *)xstrdup(tv_get_string(&argvars[0]));
rettv->v_type = VAR_STRING;
- rettv->vval.v_string = p;
-
- while (*p != NUL) {
- int l;
-
- if (enc_utf8) {
- int c, lc;
-
- c = utf_ptr2char(p);
- lc = utf_tolower(c);
- l = utf_ptr2len(p);
- /* TODO: reallocate string when byte count changes. */
- if (utf_char2len(lc) == l)
- utf_char2bytes(lc, p);
- p += l;
- } else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
- p += l; /* skip multi-byte character */
- else {
- *p = TOLOWER_LOC(*p); /* note that tolower() can be a macro */
- ++p;
- }
- }
+ rettv->vval.v_string = (char_u *)strcase_save(tv_get_string(&argvars[0]),
+ false);
}
/*
@@ -16823,7 +16802,8 @@ static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr)
static void f_toupper(typval_T *argvars, typval_T *rettv, FunPtr fptr)
{
rettv->v_type = VAR_STRING;
- rettv->vval.v_string = (char_u *)strup_save(tv_get_string(&argvars[0]));
+ rettv->vval.v_string = (char_u *)strcase_save(tv_get_string(&argvars[0]),
+ true);
}
/*
diff --git a/src/nvim/ex_getln.c b/src/nvim/ex_getln.c
index 8810204c03..9d74f554ba 100644
--- a/src/nvim/ex_getln.c
+++ b/src/nvim/ex_getln.c
@@ -1231,7 +1231,7 @@ static int command_line_handle_key(CommandLineState *s)
// command line has no uppercase characters, convert
// the character to lowercase
if (p_ic && p_scs && !pat_has_uppercase(ccline.cmdbuff)) {
- s->c = vim_tolower(s->c);
+ s->c = mb_tolower(s->c);
}
if (s->c != NUL) {
@@ -3018,7 +3018,7 @@ ExpandOne (
|| xp->xp_context == EXPAND_FILES
|| xp->xp_context == EXPAND_SHELLCMD
|| xp->xp_context == EXPAND_BUFFERS)) {
- if (vim_tolower(c0) != vim_tolower(ci)) {
+ if (mb_tolower(c0) != mb_tolower(ci)) {
break;
}
} else if (c0 != ci) {
diff --git a/src/nvim/file_search.c b/src/nvim/file_search.c
index 9592235905..db745bdd15 100644
--- a/src/nvim/file_search.c
+++ b/src/nvim/file_search.c
@@ -1057,7 +1057,7 @@ static bool ff_wc_equal(char_u *s1, char_u *s2)
c1 = PTR2CHAR(s1 + i);
c2 = PTR2CHAR(s2 + j);
- if ((p_fic ? vim_tolower(c1) != vim_tolower(c2) : c1 != c2)
+ if ((p_fic ? mb_tolower(c1) != mb_tolower(c2) : c1 != c2)
&& (prev1 != '*' || prev2 != '*')) {
return false;
}
diff --git a/src/nvim/macros.h b/src/nvim/macros.h
index a8df6322cf..22fd48de9d 100644
--- a/src/nvim/macros.h
+++ b/src/nvim/macros.h
@@ -62,7 +62,7 @@
* toupper() and tolower() that use the current locale.
* Careful: Only call TOUPPER_LOC() and TOLOWER_LOC() with a character in the
* range 0 - 255. toupper()/tolower() on some systems can't handle others.
- * Note: It is often better to use vim_tolower() and vim_toupper(), because many
+ * Note: It is often better to use mb_tolower() and mb_toupper(), because many
* toupper() and tolower() implementations only work for ASCII.
*/
#define TOUPPER_LOC toupper
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index 460528b85f..b18459a2b5 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -1174,11 +1174,14 @@ int utf_fold(int a)
return utf_convert(a, foldCase, ARRAY_SIZE(foldCase));
}
-/*
- * Return the upper-case equivalent of "a", which is a UCS-4 character. Use
- * simple case folding.
- */
-int utf_toupper(int a)
+// Vim's own character class functions. These exist because many library
+// islower()/toupper() etc. do not work properly: they crash when used with
+// invalid values or can't handle latin1 when the locale is C.
+// Speed is most important here.
+
+/// Return the upper-case equivalent of "a", which is a UCS-4 character. Use
+/// simple case folding.
+int mb_toupper(int a)
{
/* If 'casemap' contains "keepascii" use ASCII style toupper(). */
if (a < 128 && (cmp_flags & CMP_KEEPASCII))
@@ -1198,17 +1201,15 @@ int utf_toupper(int a)
return utf_convert(a, toUpper, ARRAY_SIZE(toUpper));
}
-bool utf_islower(int a)
+bool mb_islower(int a)
{
- /* German sharp s is lower case but has no upper case equivalent. */
- return (utf_toupper(a) != a) || a == 0xdf;
+ // German sharp s is lower case but has no upper case equivalent.
+ return (mb_toupper(a) != a) || a == 0xdf;
}
-/*
- * Return the lower-case equivalent of "a", which is a UCS-4 character. Use
- * simple case folding.
- */
-int utf_tolower(int a)
+/// Return the lower-case equivalent of "a", which is a UCS-4 character. Use
+/// simple case folding.
+int mb_tolower(int a)
{
/* If 'casemap' contains "keepascii" use ASCII style tolower(). */
if (a < 128 && (cmp_flags & CMP_KEEPASCII))
@@ -1228,9 +1229,9 @@ int utf_tolower(int a)
return utf_convert(a, toLower, ARRAY_SIZE(toLower));
}
-bool utf_isupper(int a)
+bool mb_isupper(int a)
{
- return utf_tolower(a) != a;
+ return mb_tolower(a) != a;
}
static int utf_strnicmp(const char_u *s1, const char_u *s2, size_t n1,
diff --git a/src/nvim/message.c b/src/nvim/message.c
index 1d3609291a..3e4a1e10b6 100644
--- a/src/nvim/message.c
+++ b/src/nvim/message.c
@@ -2730,8 +2730,8 @@ do_dialog (
break;
}
- /* Make the character lowercase, as chars in "hotkeys" are. */
- c = vim_tolower(c);
+ // Make the character lowercase, as chars in "hotkeys" are.
+ c = mb_tolower(c);
retval = 1;
for (i = 0; hotkeys[i]; ++i) {
if (has_mbyte) {
@@ -2777,7 +2777,7 @@ copy_char (
if (has_mbyte) {
if (lowercase) {
- c = vim_tolower((*mb_ptr2char)(from));
+ c = mb_tolower((*mb_ptr2char)(from));
return (*mb_char2bytes)(c, to);
} else {
len = (*mb_ptr2len)(from);
diff --git a/src/nvim/ops.c b/src/nvim/ops.c
index 68ef27222c..f11d6b69b2 100644
--- a/src/nvim/ops.c
+++ b/src/nvim/ops.c
@@ -1956,16 +1956,18 @@ int swapchar(int op_type, pos_T *pos)
if (enc_dbcs != 0 && c >= 0x100) /* No lower/uppercase letter */
return FALSE;
nc = c;
- if (vim_islower(c)) {
- if (op_type == OP_ROT13)
+ if (mb_islower(c)) {
+ if (op_type == OP_ROT13) {
nc = ROT13(c, 'a');
- else if (op_type != OP_LOWER)
- nc = vim_toupper(c);
- } else if (vim_isupper(c)) {
- if (op_type == OP_ROT13)
+ } else if (op_type != OP_LOWER) {
+ nc = mb_toupper(c);
+ }
+ } else if (mb_isupper(c)) {
+ if (op_type == OP_ROT13) {
nc = ROT13(c, 'A');
- else if (op_type != OP_UPPER)
- nc = vim_tolower(c);
+ } else if (op_type != OP_UPPER) {
+ nc = mb_tolower(c);
+ }
}
if (nc != c) {
if (enc_utf8 && (c >= 0x80 || nc >= 0x80)) {
@@ -3327,10 +3329,11 @@ void ex_display(exarg_T *eap)
get_clipboard(name, &yb, true);
- if (name == vim_tolower(redir_reg)
- || (redir_reg == '"' && yb == y_previous))
- continue; /* do not list register being written to, the
- * pointer can be freed */
+ if (name == mb_tolower(redir_reg)
+ || (redir_reg == '"' && yb == y_previous)) {
+ continue; // do not list register being written to, the
+ // pointer can be freed
+ }
if (yb->y_array != NULL) {
msg_putchar('\n');
diff --git a/src/nvim/path.c b/src/nvim/path.c
index 6bf42ed2fa..205fc2ed62 100644
--- a/src/nvim/path.c
+++ b/src/nvim/path.c
@@ -1853,7 +1853,7 @@ int pathcmp(const char *p, const char *q, int maxlen)
break;
}
- if ((p_fic ? vim_toupper(c1) != vim_toupper(c2) : c1 != c2)
+ if ((p_fic ? mb_toupper(c1) != mb_toupper(c2) : c1 != c2)
#ifdef BACKSLASH_IN_FILENAME
/* consider '/' and '\\' to be equal */
&& !((c1 == '/' && c2 == '\\')
@@ -1864,8 +1864,8 @@ int pathcmp(const char *p, const char *q, int maxlen)
return -1;
if (vim_ispathsep(c2))
return 1;
- return p_fic ? vim_toupper(c1) - vim_toupper(c2)
- : c1 - c2; /* no match */
+ return p_fic ? mb_toupper(c1) - mb_toupper(c2)
+ : c1 - c2; // no match
}
i += MB_PTR2LEN((char_u *)p + i);
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c
index 9baa53d2a2..4b5e17b00b 100644
--- a/src/nvim/regexp.c
+++ b/src/nvim/regexp.c
@@ -2350,7 +2350,7 @@ collection:
break;
case CLASS_LOWER:
for (cu = 1; cu <= 255; cu++) {
- if (vim_islower(cu) && cu != 170 && cu != 186) {
+ if (mb_islower(cu) && cu != 170 && cu != 186) {
regmbc(cu);
}
}
@@ -2376,7 +2376,7 @@ collection:
break;
case CLASS_UPPER:
for (cu = 1; cu <= 255; cu++) {
- if (vim_isupper(cu)) {
+ if (mb_isupper(cu)) {
regmbc(cu);
}
}
@@ -3474,7 +3474,7 @@ static long bt_regexec_both(char_u *line,
|| (ireg_ic
&& (((enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
|| (c < 255 && prog->regstart < 255
- && vim_tolower(prog->regstart) == vim_tolower(c))))) {
+ && mb_tolower(prog->regstart) == mb_tolower(c))))) {
retval = regtry(prog, col);
} else {
retval = 0;
@@ -4155,7 +4155,7 @@ regmatch (
if (*opnd != *reginput
&& (!ireg_ic
|| (!enc_utf8
- && vim_tolower(*opnd) != vim_tolower(*reginput)))) {
+ && mb_tolower(*opnd) != mb_tolower(*reginput)))) {
status = RA_NOMATCH;
} else if (*opnd == NUL) {
// match empty string always works; happens when "~" is
@@ -4573,12 +4573,14 @@ regmatch (
if (OP(next) == EXACTLY) {
rst.nextb = *OPERAND(next);
if (ireg_ic) {
- if (vim_isupper(rst.nextb))
- rst.nextb_ic = vim_tolower(rst.nextb);
- else
- rst.nextb_ic = vim_toupper(rst.nextb);
- } else
+ if (mb_isupper(rst.nextb)) {
+ rst.nextb_ic = mb_tolower(rst.nextb);
+ } else {
+ rst.nextb_ic = mb_toupper(rst.nextb);
+ }
+ } else {
rst.nextb_ic = rst.nextb;
+ }
} else {
rst.nextb = NUL;
rst.nextb_ic = NUL;
@@ -5339,8 +5341,8 @@ do_class:
* would have been used for it. It does handle single-byte
* characters, such as latin1. */
if (ireg_ic) {
- cu = vim_toupper(*opnd);
- cl = vim_tolower(*opnd);
+ cu = mb_toupper(*opnd);
+ cl = mb_tolower(*opnd);
while (count < maxcount && (*scan == cu || *scan == cl)) {
count++;
scan++;
@@ -6312,14 +6314,15 @@ static char_u *cstrchr(char_u *s, int c)
/* tolower() and toupper() can be slow, comparing twice should be a lot
* faster (esp. when using MS Visual C++!).
* For UTF-8 need to use folded case. */
- if (enc_utf8 && c > 0x80)
+ if (c > 0x80) {
cc = utf_fold(c);
- else if (vim_isupper(c))
- cc = vim_tolower(c);
- else if (vim_islower(c))
- cc = vim_toupper(c);
- else
+ } else if (mb_isupper(c)) {
+ cc = mb_tolower(c);
+ } else if (mb_islower(c)) {
+ cc = mb_toupper(c);
+ } else {
return vim_strchr(s, c);
+ }
if (has_mbyte) {
for (p = s; *p != NUL; p += (*mb_ptr2len)(p)) {
@@ -6348,28 +6351,28 @@ static char_u *cstrchr(char_u *s, int c)
static fptr_T do_upper(int *d, int c)
{
- *d = vim_toupper(c);
+ *d = mb_toupper(c);
return (fptr_T)NULL;
}
static fptr_T do_Upper(int *d, int c)
{
- *d = vim_toupper(c);
+ *d = mb_toupper(c);
return (fptr_T)do_Upper;
}
static fptr_T do_lower(int *d, int c)
{
- *d = vim_tolower(c);
+ *d = mb_tolower(c);
return (fptr_T)NULL;
}
static fptr_T do_Lower(int *d, int c)
{
- *d = vim_tolower(c);
+ *d = mb_tolower(c);
return (fptr_T)do_Lower;
}
diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c
index 5b49ab38f0..caf26fdd35 100644
--- a/src/nvim/regexp_nfa.c
+++ b/src/nvim/regexp_nfa.c
@@ -4373,7 +4373,7 @@ static int check_char_class(int class, int c)
return OK;
break;
case NFA_CLASS_LOWER:
- if (vim_islower(c) && c != 170 && c != 186) {
+ if (mb_islower(c) && c != 170 && c != 186) {
return OK;
}
break;
@@ -4391,8 +4391,9 @@ static int check_char_class(int class, int c)
return OK;
break;
case NFA_CLASS_UPPER:
- if (vim_isupper(c))
+ if (mb_isupper(c)) {
return OK;
+ }
break;
case NFA_CLASS_XDIGIT:
if (ascii_isxdigit(c))
@@ -4892,7 +4893,7 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text)
int c2_len = PTR2LEN(s2);
int c2 = PTR2CHAR(s2);
- if ((c1 != c2 && (!ireg_ic || vim_tolower(c1) != vim_tolower(c2)))
+ if ((c1 != c2 && (!ireg_ic || mb_tolower(c1) != mb_tolower(c2)))
|| c1_len != c2_len) {
match = false;
break;
@@ -5585,22 +5586,24 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
break;
}
if (ireg_ic) {
- int curc_low = vim_tolower(curc);
- int done = FALSE;
+ int curc_low = mb_tolower(curc);
+ int done = false;
- for (; c1 <= c2; ++c1)
- if (vim_tolower(c1) == curc_low) {
+ for (; c1 <= c2; c1++) {
+ if (mb_tolower(c1) == curc_low) {
result = result_if_matched;
done = TRUE;
break;
}
- if (done)
+ }
+ if (done) {
break;
+ }
}
} else if (state->c < 0 ? check_char_class(state->c, curc)
: (curc == state->c
- || (ireg_ic && vim_tolower(curc)
- == vim_tolower(state->c)))) {
+ || (ireg_ic && mb_tolower(curc)
+ == mb_tolower(state->c)))) {
result = result_if_matched;
break;
}
@@ -6003,8 +6006,9 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
#endif
result = (c == curc);
- if (!result && ireg_ic)
- result = vim_tolower(c) == vim_tolower(curc);
+ if (!result && ireg_ic) {
+ result = mb_tolower(c) == mb_tolower(curc);
+ }
// If ireg_icombine is not set only skip over the character
// itself. When it is set skip over composing characters.
@@ -6152,8 +6156,8 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
// Checking if the required start character matches is
// cheaper than adding a state that won't match.
c = PTR2CHAR(reginput + clen);
- if (c != prog->regstart && (!ireg_ic || vim_tolower(c)
- != vim_tolower(prog->regstart))) {
+ if (c != prog->regstart && (!ireg_ic || mb_tolower(c)
+ != mb_tolower(prog->regstart))) {
#ifdef REGEXP_DEBUG
fprintf(log_fd,
" Skipping start state, regstart does not match\n");
diff --git a/src/nvim/search.c b/src/nvim/search.c
index c5c92b41c5..91a558045f 100644
--- a/src/nvim/search.c
+++ b/src/nvim/search.c
@@ -335,23 +335,26 @@ int pat_has_uppercase(char_u *pat)
while (*p != NUL) {
int l;
- if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) {
- if (enc_utf8 && utf_isupper(utf_ptr2char(p)))
- return TRUE;
+ if ((l = mb_ptr2len(p)) > 1) {
+ if (mb_isupper(utf_ptr2char(p))) {
+ return true;
+ }
p += l;
} else if (*p == '\\') {
- if (p[1] == '_' && p[2] != NUL) /* skip "\_X" */
+ if (p[1] == '_' && p[2] != NUL) { // skip "\_X"
p += 3;
- else if (p[1] == '%' && p[2] != NUL) /* skip "\%X" */
+ } else if (p[1] == '%' && p[2] != NUL) { // skip "\%X"
p += 3;
- else if (p[1] != NUL) /* skip "\X" */
+ } else if (p[1] != NUL) { // skip "\X"
p += 2;
- else
+ } else {
p += 1;
- } else if (vim_isupper(*p))
- return TRUE;
- else
- ++p;
+ }
+ } else if (mb_isupper(*p)) {
+ return true;
+ } else {
+ p++;
+ }
}
return FALSE;
}
diff --git a/src/nvim/spell.c b/src/nvim/spell.c
index d4f49bffb2..18febda1d8 100644
--- a/src/nvim/spell.c
+++ b/src/nvim/spell.c
@@ -2526,8 +2526,7 @@ void clear_spell_chartab(spelltab_T *sp)
}
}
-// Init the chartab used for spelling. Only depends on 'encoding'.
-// Called once while starting up and when 'encoding' changes.
+// Init the chartab used for spelling. Called once while starting up.
// The default is to use isalpha(), but the spell file should define the word
// characters to make it possible that 'encoding' differs from the current
// locale. For utf-8 we don't use isalpha() but our own functions.
@@ -2537,36 +2536,17 @@ void init_spell_chartab(void)
did_set_spelltab = false;
clear_spell_chartab(&spelltab);
- if (enc_dbcs) {
- // DBCS: assume double-wide characters are word characters.
- for (i = 128; i <= 255; ++i)
- if (MB_BYTE2LEN(i) == 2)
- spelltab.st_isw[i] = true;
- } else if (enc_utf8) {
- for (i = 128; i < 256; ++i) {
- int f = utf_fold(i);
- int u = utf_toupper(i);
-
- spelltab.st_isu[i] = utf_isupper(i);
- spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
- // The folded/upper-cased value is different between latin1 and
- // utf8 for 0xb5, causing E763 for no good reason. Use the latin1
- // value for utf-8 to avoid this.
- spelltab.st_fold[i] = (f < 256) ? f : i;
- spelltab.st_upper[i] = (u < 256) ? u : i;
- }
- } else {
- // Rough guess: use locale-dependent library functions.
- for (i = 128; i < 256; ++i) {
- if (vim_isupper(i)) {
- spelltab.st_isw[i] = true;
- spelltab.st_isu[i] = true;
- spelltab.st_fold[i] = vim_tolower(i);
- } else if (vim_islower(i)) {
- spelltab.st_isw[i] = true;
- spelltab.st_upper[i] = vim_toupper(i);
- }
- }
+ for (i = 128; i < 256; i++) {
+ int f = utf_fold(i);
+ int u = mb_toupper(i);
+
+ spelltab.st_isu[i] = mb_isupper(i);
+ spelltab.st_isw[i] = spelltab.st_isu[i] || mb_islower(i);
+ // The folded/upper-cased value is different between latin1 and
+ // utf8 for 0xb5, causing E763 for no good reason. Use the latin1
+ // value for utf-8 to avoid this.
+ spelltab.st_fold[i] = (f < 256) ? f : i;
+ spelltab.st_upper[i] = (u < 256) ? u : i;
}
}
diff --git a/src/nvim/spell_defs.h b/src/nvim/spell_defs.h
index c54a7f5390..ddd54c724e 100644
--- a/src/nvim/spell_defs.h
+++ b/src/nvim/spell_defs.h
@@ -265,11 +265,11 @@ typedef struct trystate_S {
: (c) < \
256 ? (int)spelltab.st_fold[c] : (int)towlower(c))
-#define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
+#define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? mb_toupper(c) \
: (c) < \
256 ? (int)spelltab.st_upper[c] : (int)towupper(c))
-#define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
+#define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? mb_isupper(c) \
: (c) < 256 ? spelltab.st_isu[c] : iswupper(c))
// First language that is loaded, start of the linked list of loaded
diff --git a/src/nvim/spellfile.c b/src/nvim/spellfile.c
index bbef1f5032..1da71dc4f9 100644
--- a/src/nvim/spellfile.c
+++ b/src/nvim/spellfile.c
@@ -225,6 +225,7 @@
#include <stdio.h>
#include <stdint.h>
#include <wctype.h>
+#include <strings.h>
#include "nvim/vim.h"
#include "nvim/spell_defs.h"
@@ -267,7 +268,7 @@
#define SAL_REM_ACCENTS 4
#define VIMSPELLMAGIC "VIMspell" // string at start of Vim spell file
-#define VIMSPELLMAGICL 8
+#define VIMSPELLMAGICL (sizeof(VIMSPELLMAGIC) - 1)
#define VIMSPELLVERSION 50
// Section IDs. Only renumber them when VIMSPELLVERSION changes!
@@ -494,6 +495,64 @@ typedef struct spellinfo_S {
# include "spellfile.c.generated.h"
#endif
+/// Read n bytes from fd to buf, returning on errors
+///
+/// @param[out] buf Buffer to read to, must be at least n bytes long.
+/// @param[in] n Amount of bytes to read.
+/// @param fd FILE* to read from.
+/// @param exit_code Code to run before returning.
+///
+/// @return Allows to proceed if everything is OK, returns SP_TRUNCERROR if
+/// there are not enough bytes, returns SP_OTHERERROR if reading failed.
+#define SPELL_READ_BYTES(buf, n, fd, exit_code) \
+ do { \
+ const size_t n__SPRB = (n); \
+ FILE *const fd__SPRB = (fd); \
+ char *const buf__SPRB = (buf); \
+ const size_t read_bytes__SPRB = fread(buf__SPRB, 1, n__SPRB, fd__SPRB); \
+ if (read_bytes__SPRB != n__SPRB) { \
+ exit_code; \
+ return feof(fd__SPRB) ? SP_TRUNCERROR : SP_OTHERERROR; \
+ } \
+ } while (0)
+
+/// Like #SPELL_READ_BYTES, but also error out if NUL byte was read
+///
+/// @return Allows to proceed if everything is OK, returns SP_TRUNCERROR if
+/// there are not enough bytes, returns SP_OTHERERROR if reading failed,
+/// returns SP_FORMERROR if read out a NUL byte.
+#define SPELL_READ_NONNUL_BYTES(buf, n, fd, exit_code) \
+ do { \
+ const size_t n__SPRNB = (n); \
+ FILE *const fd__SPRNB = (fd); \
+ char *const buf__SPRNB = (buf); \
+ SPELL_READ_BYTES(buf__SPRNB, n__SPRNB, fd__SPRNB, exit_code); \
+ if (memchr(buf__SPRNB, NUL, (size_t)n__SPRNB)) { \
+ exit_code; \
+ return SP_FORMERROR; \
+ } \
+ } while (0)
+
+/// Check that spell file starts with a magic string
+///
+/// Does not check for version of the file.
+///
+/// @param fd File to check.
+///
+/// @return 0 in case of success, SP_TRUNCERROR if file contains not enough
+/// bytes, SP_FORMERROR if it does not match magic string and
+/// SP_OTHERERROR if reading file failed.
+static inline int spell_check_magic_string(FILE *const fd)
+ FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE
+{
+ char buf[VIMSPELLMAGICL];
+ SPELL_READ_BYTES(buf, VIMSPELLMAGICL, fd, ;);
+ if (memcmp(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) {
+ return SP_FORMERROR;
+ }
+ return 0;
+}
+
// Load one spell file and store the info into a slang_T.
//
// This is invoked in three ways:
@@ -514,9 +573,7 @@ spell_load_file (
)
{
FILE *fd;
- char_u buf[VIMSPELLMAGICL];
char_u *p;
- int i;
int n;
int len;
char_u *save_sourcing_name = sourcing_name;
@@ -558,11 +615,20 @@ spell_load_file (
sourcing_lnum = 0;
// <HEADER>: <fileID>
- for (i = 0; i < VIMSPELLMAGICL; ++i)
- buf[i] = getc(fd); // <fileID>
- if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0) {
- EMSG(_("E757: This does not look like a spell file"));
- goto endFAIL;
+ const int scms_ret = spell_check_magic_string(fd);
+ switch (scms_ret) {
+ case SP_FORMERROR:
+ case SP_TRUNCERROR: {
+ emsgf(_("E757: This does not look like a spell file"));
+ goto endFAIL;
+ }
+ case SP_OTHERERROR: {
+ emsgf(_("E5042: Failed to read spell file %s: %s"),
+ fname, strerror(ferror(fd)));
+ }
+ case 0: {
+ break;
+ }
}
c = getc(fd); // <versionnr>
if (c < VIMSPELLVERSION) {
@@ -935,12 +1001,10 @@ static char_u *read_cnt_string(FILE *fd, int cnt_bytes, int *cntp)
// Return SP_*ERROR flags.
static int read_region_section(FILE *fd, slang_T *lp, int len)
{
- int i;
-
- if (len > 16)
+ if (len > 16) {
return SP_FORMERROR;
- for (i = 0; i < len; ++i)
- lp->sl_regions[i] = getc(fd); // <regionname>
+ }
+ SPELL_READ_NONNUL_BYTES((char *)lp->sl_regions, (size_t)len, fd, ;);
lp->sl_regions[len] = NUL;
return 0;
}
@@ -983,35 +1047,30 @@ static int read_charflags_section(FILE *fd)
// Return SP_*ERROR flags.
static int read_prefcond_section(FILE *fd, slang_T *lp)
{
- int cnt;
- int i;
- int n;
- char_u *p;
- char_u buf[MAXWLEN + 1];
-
// <prefcondcnt> <prefcond> ...
- cnt = get2c(fd); // <prefcondcnt>
- if (cnt <= 0)
+ const int cnt = get2c(fd); // <prefcondcnt>
+ if (cnt <= 0) {
return SP_FORMERROR;
+ }
lp->sl_prefprog = xcalloc(cnt, sizeof(regprog_T *));
lp->sl_prefixcnt = cnt;
- for (i = 0; i < cnt; ++i) {
+ for (int i = 0; i < cnt; i++) {
// <prefcond> : <condlen> <condstr>
- n = getc(fd); // <condlen>
- if (n < 0 || n >= MAXWLEN)
+ const int n = getc(fd); // <condlen>
+ if (n < 0 || n >= MAXWLEN) {
return SP_FORMERROR;
+ }
// When <condlen> is zero we have an empty condition. Otherwise
// compile the regexp program used to check for the condition.
if (n > 0) {
- buf[0] = '^'; // always match at one position only
- p = buf + 1;
- while (n-- > 0)
- *p++ = getc(fd); // <condstr>
- *p = NUL;
- lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
+ char buf[MAXWLEN + 1];
+ buf[0] = '^'; // always match at one position only
+ SPELL_READ_NONNUL_BYTES(buf + 1, (size_t)n, fd, ;);
+ buf[n + 1] = NUL;
+ lp->sl_prefprog[i] = vim_regcomp((char_u *)buf, RE_MAGIC | RE_STRING);
}
}
return 0;
@@ -1064,7 +1123,6 @@ static int read_rep_section(FILE *fd, garray_T *gap, int16_t *first)
// Return SP_*ERROR flags.
static int read_sal_section(FILE *fd, slang_T *slang)
{
- int i;
int cnt;
garray_T *gap;
salitem_T *smp;
@@ -1074,13 +1132,16 @@ static int read_sal_section(FILE *fd, slang_T *slang)
slang->sl_sofo = false;
- i = getc(fd); // <salflags>
- if (i & SAL_F0LLOWUP)
+ const int flags = getc(fd); // <salflags>
+ if (flags & SAL_F0LLOWUP) {
slang->sl_followup = true;
- if (i & SAL_COLLAPSE)
+ }
+ if (flags & SAL_COLLAPSE) {
slang->sl_collapse = true;
- if (i & SAL_REM_ACCENTS)
+ }
+ if (flags & SAL_REM_ACCENTS) {
slang->sl_rem_accents = true;
+ }
cnt = get2c(fd); // <salcount>
if (cnt < 0)
@@ -1100,7 +1161,8 @@ static int read_sal_section(FILE *fd, slang_T *slang)
smp->sm_lead = p;
// Read up to the first special char into sm_lead.
- for (i = 0; i < ccnt; ++i) {
+ int i = 0;
+ for (; i < ccnt; ++i) {
c = getc(fd); // <salfrom>
if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
break;
@@ -1126,11 +1188,17 @@ static int read_sal_section(FILE *fd, slang_T *slang)
// Any following chars go in sm_rules.
smp->sm_rules = p;
- if (i < ccnt)
+ if (i < ccnt) {
// store the char we got while checking for end of sm_lead
*p++ = c;
- for (++i; i < ccnt; ++i)
- *p++ = getc(fd); // <salfrom>
+ }
+ i++;
+ if (i < ccnt) {
+ SPELL_READ_NONNUL_BYTES( // <salfrom>
+ (char *)p, (size_t)(ccnt - i), fd, xfree(smp->sm_lead));
+ p += (ccnt - i);
+ i = ccnt;
+ }
*p++ = NUL;
// <saltolen> <salto>
diff --git a/src/nvim/strings.c b/src/nvim/strings.c
index 5dcffe00e0..87e066d80a 100644
--- a/src/nvim/strings.c
+++ b/src/nvim/strings.c
@@ -291,14 +291,15 @@ void vim_strup(char_u *p)
}
}
-/// Make given string all upper-case
+/// Make given string all upper-case or all lower-case
///
-/// Handels multi-byte characters as good as possible.
+/// Handles multi-byte characters as good as possible.
///
/// @param[in] orig Input string.
+/// @param[in] upper If true make uppercase, otherwise lowercase
///
/// @return [allocated] upper-cased string.
-char *strup_save(const char *const orig)
+char *strcase_save(const char *const orig, bool upper)
FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL
{
char *res = xstrdup(orig);
@@ -307,33 +308,25 @@ char *strup_save(const char *const orig)
while (*p != NUL) {
int l;
- if (enc_utf8) {
- int c = utf_ptr2char((const char_u *)p);
- int uc = utf_toupper(c);
-
- // Reallocate string when byte count changes. This is rare,
- // thus it's OK to do another malloc()/free().
- l = utf_ptr2len((const char_u *)p);
- int newl = utf_char2len(uc);
- if (newl != l) {
- // TODO(philix): use xrealloc() in strup_save()
- char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l));
- memcpy(s, res, (size_t)(p - res));
- STRCPY(s + (p - res) + newl, p + l);
- p = s + (p - res);
- xfree(res);
- res = s;
- }
-
- utf_char2bytes(uc, (char_u *)p);
- p += newl;
- } else if (has_mbyte && (l = (*mb_ptr2len)((const char_u *)p)) > 1) {
- p += l; // Skip multi-byte character.
- } else {
- // note that toupper() can be a macro
- *p = (char)(uint8_t)TOUPPER_LOC(*p);
- p++;
+ int c = utf_ptr2char((const char_u *)p);
+ int uc = upper ? mb_toupper(c) : mb_tolower(c);
+
+ // Reallocate string when byte count changes. This is rare,
+ // thus it's OK to do another malloc()/free().
+ l = utf_ptr2len((const char_u *)p);
+ int newl = utf_char2len(uc);
+ if (newl != l) {
+ // TODO(philix): use xrealloc() in strup_save()
+ char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l));
+ memcpy(s, res, (size_t)(p - res));
+ STRCPY(s + (p - res) + newl, p + l);
+ p = s + (p - res);
+ xfree(res);
+ res = s;
}
+
+ utf_char2bytes(uc, (char_u *)p);
+ p += newl;
}
return res;
diff --git a/src/nvim/syntax.c b/src/nvim/syntax.c
index e36b00d770..1ed65ec52a 100644
--- a/src/nvim/syntax.c
+++ b/src/nvim/syntax.c
@@ -4246,83 +4246,81 @@ static void syn_cmd_keyword(exarg_T *eap, int syncing)
if (rest != NULL) {
syn_id = syn_check_group(arg, (int)(group_name_end - arg));
- if (syn_id != 0)
- /* allocate a buffer, for removing backslashes in the keyword */
+ if (syn_id != 0) {
+ // Allocate a buffer, for removing backslashes in the keyword.
keyword_copy = xmalloc(STRLEN(rest) + 1);
- syn_opt_arg.flags = 0;
- syn_opt_arg.keyword = TRUE;
- syn_opt_arg.sync_idx = NULL;
- syn_opt_arg.has_cont_list = FALSE;
- syn_opt_arg.cont_in_list = NULL;
- syn_opt_arg.next_list = NULL;
-
- /*
- * The options given apply to ALL keywords, so all options must be
- * found before keywords can be created.
- * 1: collect the options and copy the keywords to keyword_copy.
- */
- cnt = 0;
- p = keyword_copy;
- for (; rest != NULL && !ends_excmd(*rest); rest = skipwhite(rest)) {
- rest = get_syn_options(rest, &syn_opt_arg, &conceal_char);
- if (rest == NULL || ends_excmd(*rest))
- break;
- /* Copy the keyword, removing backslashes, and add a NUL. */
- while (*rest != NUL && !ascii_iswhite(*rest)) {
- if (*rest == '\\' && rest[1] != NUL)
- ++rest;
- *p++ = *rest++;
- }
- *p++ = NUL;
- ++cnt;
}
+ if (keyword_copy != NULL) {
+ syn_opt_arg.flags = 0;
+ syn_opt_arg.keyword = true;
+ syn_opt_arg.sync_idx = NULL;
+ syn_opt_arg.has_cont_list = false;
+ syn_opt_arg.cont_in_list = NULL;
+ syn_opt_arg.next_list = NULL;
+
+ // The options given apply to ALL keywords, so all options must be
+ // found before keywords can be created.
+ // 1: collect the options and copy the keywords to keyword_copy.
+ cnt = 0;
+ p = keyword_copy;
+ for (; rest != NULL && !ends_excmd(*rest); rest = skipwhite(rest)) {
+ rest = get_syn_options(rest, &syn_opt_arg, &conceal_char);
+ if (rest == NULL || ends_excmd(*rest)) {
+ break;
+ }
+ // Copy the keyword, removing backslashes, and add a NUL.
+ while (*rest != NUL && !ascii_iswhite(*rest)) {
+ if (*rest == '\\' && rest[1] != NUL) {
+ rest++;
+ }
+ *p++ = *rest++;
+ }
+ *p++ = NUL;
+ cnt++;
+ }
- if (!eap->skip) {
- /* Adjust flags for use of ":syn include". */
- syn_incl_toplevel(syn_id, &syn_opt_arg.flags);
+ if (!eap->skip) {
+ // Adjust flags for use of ":syn include".
+ syn_incl_toplevel(syn_id, &syn_opt_arg.flags);
- /*
- * 2: Add an entry for each keyword.
- */
- for (kw = keyword_copy; --cnt >= 0; kw += STRLEN(kw) + 1) {
- for (p = vim_strchr(kw, '[');; ) {
- if (p != NULL)
- *p = NUL;
- add_keyword(kw, syn_id, syn_opt_arg.flags,
- syn_opt_arg.cont_in_list,
- syn_opt_arg.next_list, conceal_char);
- if (p == NULL)
- break;
- if (p[1] == NUL) {
- EMSG2(_("E789: Missing ']': %s"), kw);
- goto error;
- }
- if (p[1] == ']') {
- if (p[2] != NUL) {
- EMSG3(_("E890: trailing char after ']': %s]%s"),
- kw, &p[2]);
+ // 2: Add an entry for each keyword.
+ for (kw = keyword_copy; --cnt >= 0; kw += STRLEN(kw) + 1) {
+ for (p = vim_strchr(kw, '[');; ) {
+ if (p != NULL) {
+ *p = NUL;
+ }
+ add_keyword(kw, syn_id, syn_opt_arg.flags,
+ syn_opt_arg.cont_in_list,
+ syn_opt_arg.next_list, conceal_char);
+ if (p == NULL) {
+ break;
+ }
+ if (p[1] == NUL) {
+ emsgf(_("E789: Missing ']': %s"), kw);
goto error;
}
- kw = p + 1;
- break; // skip over the "]"
- }
- if (has_mbyte) {
- int l = (*mb_ptr2len)(p + 1);
+ if (p[1] == ']') {
+ if (p[2] != NUL) {
+ emsgf(_("E890: trailing char after ']': %s]%s"),
+ kw, &p[2]);
+ goto error;
+ }
+ kw = p + 1;
+ break; // skip over the "]"
+ }
+ const int l = (*mb_ptr2len)(p + 1);
memmove(p, p + 1, l);
p += l;
- } else {
- p[0] = p[1];
- ++p;
}
}
}
- }
error:
- xfree(keyword_copy);
- xfree(syn_opt_arg.cont_in_list);
- xfree(syn_opt_arg.next_list);
+ xfree(keyword_copy);
+ xfree(syn_opt_arg.cont_in_list);
+ xfree(syn_opt_arg.next_list);
+ }
}
if (rest != NULL)
diff --git a/src/nvim/testdir/test_functions.vim b/src/nvim/testdir/test_functions.vim
index 81cb6314ce..3c258299c1 100644
--- a/src/nvim/testdir/test_functions.vim
+++ b/src/nvim/testdir/test_functions.vim
@@ -29,3 +29,147 @@ func Test_setbufvar_options()
bwipe!
endfunc
+func Test_tolower()
+ call assert_equal("", tolower(""))
+
+ " Test with all printable ASCII characters.
+ call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
+ \ tolower(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'))
+
+ if !has('multi_byte')
+ return
+ endif
+
+ " Test with a few uppercase diacritics.
+ call assert_equal("aàáâãäåāăąǎǟǡả", tolower("AÀÁÂÃÄÅĀĂĄǍǞǠẢ"))
+ call assert_equal("bḃḇ", tolower("BḂḆ"))
+ call assert_equal("cçćĉċč", tolower("CÇĆĈĊČ"))
+ call assert_equal("dďđḋḏḑ", tolower("DĎĐḊḎḐ"))
+ call assert_equal("eèéêëēĕėęěẻẽ", tolower("EÈÉÊËĒĔĖĘĚẺẼ"))
+ call assert_equal("fḟ ", tolower("FḞ "))
+ call assert_equal("gĝğġģǥǧǵḡ", tolower("GĜĞĠĢǤǦǴḠ"))
+ call assert_equal("hĥħḣḧḩ", tolower("HĤĦḢḦḨ"))
+ call assert_equal("iìíîïĩīĭįiǐỉ", tolower("IÌÍÎÏĨĪĬĮİǏỈ"))
+ call assert_equal("jĵ", tolower("JĴ"))
+ call assert_equal("kķǩḱḵ", tolower("KĶǨḰḴ"))
+ call assert_equal("lĺļľŀłḻ", tolower("LĹĻĽĿŁḺ"))
+ call assert_equal("mḿṁ", tolower("MḾṀ"))
+ call assert_equal("nñńņňṅṉ", tolower("NÑŃŅŇṄṈ"))
+ call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ"))
+ call assert_equal("pṕṗ", tolower("PṔṖ"))
+ call assert_equal("q", tolower("Q"))
+ call assert_equal("rŕŗřṙṟ", tolower("RŔŖŘṘṞ"))
+ call assert_equal("sśŝşšṡ", tolower("SŚŜŞŠṠ"))
+ call assert_equal("tţťŧṫṯ", tolower("TŢŤŦṪṮ"))
+ call assert_equal("uùúûüũūŭůűųưǔủ", tolower("UÙÚÛÜŨŪŬŮŰŲƯǓỦ"))
+ call assert_equal("vṽ", tolower("VṼ"))
+ call assert_equal("wŵẁẃẅẇ", tolower("WŴẀẂẄẆ"))
+ call assert_equal("xẋẍ", tolower("XẊẌ"))
+ call assert_equal("yýŷÿẏỳỷỹ", tolower("YÝŶŸẎỲỶỸ"))
+ call assert_equal("zźżžƶẑẕ", tolower("ZŹŻŽƵẐẔ"))
+
+ " Test with a few lowercase diacritics, which should remain unchanged.
+ call assert_equal("aàáâãäåāăąǎǟǡả", tolower("aàáâãäåāăąǎǟǡả"))
+ call assert_equal("bḃḇ", tolower("bḃḇ"))
+ call assert_equal("cçćĉċč", tolower("cçćĉċč"))
+ call assert_equal("dďđḋḏḑ", tolower("dďđḋḏḑ"))
+ call assert_equal("eèéêëēĕėęěẻẽ", tolower("eèéêëēĕėęěẻẽ"))
+ call assert_equal("fḟ", tolower("fḟ"))
+ call assert_equal("gĝğġģǥǧǵḡ", tolower("gĝğġģǥǧǵḡ"))
+ call assert_equal("hĥħḣḧḩẖ", tolower("hĥħḣḧḩẖ"))
+ call assert_equal("iìíîïĩīĭįǐỉ", tolower("iìíîïĩīĭįǐỉ"))
+ call assert_equal("jĵǰ", tolower("jĵǰ"))
+ call assert_equal("kķǩḱḵ", tolower("kķǩḱḵ"))
+ call assert_equal("lĺļľŀłḻ", tolower("lĺļľŀłḻ"))
+ call assert_equal("mḿṁ ", tolower("mḿṁ "))
+ call assert_equal("nñńņňʼnṅṉ", tolower("nñńņňʼnṅṉ"))
+ call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("oòóôõöøōŏőơǒǫǭỏ"))
+ call assert_equal("pṕṗ", tolower("pṕṗ"))
+ call assert_equal("q", tolower("q"))
+ call assert_equal("rŕŗřṙṟ", tolower("rŕŗřṙṟ"))
+ call assert_equal("sśŝşšṡ", tolower("sśŝşšṡ"))
+ call assert_equal("tţťŧṫṯẗ", tolower("tţťŧṫṯẗ"))
+ call assert_equal("uùúûüũūŭůűųưǔủ", tolower("uùúûüũūŭůűųưǔủ"))
+ call assert_equal("vṽ", tolower("vṽ"))
+ call assert_equal("wŵẁẃẅẇẘ", tolower("wŵẁẃẅẇẘ"))
+ call assert_equal("ẋẍ", tolower("ẋẍ"))
+ call assert_equal("yýÿŷẏẙỳỷỹ", tolower("yýÿŷẏẙỳỷỹ"))
+ call assert_equal("zźżžƶẑẕ", tolower("zźżžƶẑẕ"))
+
+ " According to https://twitter.com/jifa/status/625776454479970304
+ " Ⱥ (U+023A) and Ⱦ (U+023E) are the *only* code points to increase
+ " in length (2 to 3 bytes) when lowercased. So let's test them.
+ call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
+endfunc
+
+func Test_toupper()
+ call assert_equal("", toupper(""))
+
+ " Test with all printable ASCII characters.
+ call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~',
+ \ toupper(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'))
+
+ if !has('multi_byte')
+ return
+ endif
+
+ " Test with a few lowercase diacritics.
+ call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("aàáâãäåāăąǎǟǡả"))
+ call assert_equal("BḂḆ", toupper("bḃḇ"))
+ call assert_equal("CÇĆĈĊČ", toupper("cçćĉċč"))
+ call assert_equal("DĎĐḊḎḐ", toupper("dďđḋḏḑ"))
+ call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("eèéêëēĕėęěẻẽ"))
+ call assert_equal("FḞ", toupper("fḟ"))
+ call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("gĝğġģǥǧǵḡ"))
+ call assert_equal("HĤĦḢḦḨẖ", toupper("hĥħḣḧḩẖ"))
+ call assert_equal("IÌÍÎÏĨĪĬĮǏỈ", toupper("iìíîïĩīĭįǐỉ"))
+ call assert_equal("JĴǰ", toupper("jĵǰ"))
+ call assert_equal("KĶǨḰḴ", toupper("kķǩḱḵ"))
+ call assert_equal("LĹĻĽĿŁḺ", toupper("lĺļľŀłḻ"))
+ call assert_equal("MḾṀ ", toupper("mḿṁ "))
+ call assert_equal("NÑŃŅŇʼnṄṈ", toupper("nñńņňʼnṅṉ"))
+ call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("oòóôõöøōŏőơǒǫǭỏ"))
+ call assert_equal("PṔṖ", toupper("pṕṗ"))
+ call assert_equal("Q", toupper("q"))
+ call assert_equal("RŔŖŘṘṞ", toupper("rŕŗřṙṟ"))
+ call assert_equal("SŚŜŞŠṠ", toupper("sśŝşšṡ"))
+ call assert_equal("TŢŤŦṪṮẗ", toupper("tţťŧṫṯẗ"))
+ call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("uùúûüũūŭůűųưǔủ"))
+ call assert_equal("VṼ", toupper("vṽ"))
+ call assert_equal("WŴẀẂẄẆẘ", toupper("wŵẁẃẅẇẘ"))
+ call assert_equal("ẊẌ", toupper("ẋẍ"))
+ call assert_equal("YÝŸŶẎẙỲỶỸ", toupper("yýÿŷẏẙỳỷỹ"))
+ call assert_equal("ZŹŻŽƵẐẔ", toupper("zźżžƶẑẕ"))
+
+ " Test that uppercase diacritics, which should remain unchanged.
+ call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("AÀÁÂÃÄÅĀĂĄǍǞǠẢ"))
+ call assert_equal("BḂḆ", toupper("BḂḆ"))
+ call assert_equal("CÇĆĈĊČ", toupper("CÇĆĈĊČ"))
+ call assert_equal("DĎĐḊḎḐ", toupper("DĎĐḊḎḐ"))
+ call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("EÈÉÊËĒĔĖĘĚẺẼ"))
+ call assert_equal("FḞ ", toupper("FḞ "))
+ call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("GĜĞĠĢǤǦǴḠ"))
+ call assert_equal("HĤĦḢḦḨ", toupper("HĤĦḢḦḨ"))
+ call assert_equal("IÌÍÎÏĨĪĬĮİǏỈ", toupper("IÌÍÎÏĨĪĬĮİǏỈ"))
+ call assert_equal("JĴ", toupper("JĴ"))
+ call assert_equal("KĶǨḰḴ", toupper("KĶǨḰḴ"))
+ call assert_equal("LĹĻĽĿŁḺ", toupper("LĹĻĽĿŁḺ"))
+ call assert_equal("MḾṀ", toupper("MḾṀ"))
+ call assert_equal("NÑŃŅŇṄṈ", toupper("NÑŃŅŇṄṈ"))
+ call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ"))
+ call assert_equal("PṔṖ", toupper("PṔṖ"))
+ call assert_equal("Q", toupper("Q"))
+ call assert_equal("RŔŖŘṘṞ", toupper("RŔŖŘṘṞ"))
+ call assert_equal("SŚŜŞŠṠ", toupper("SŚŜŞŠṠ"))
+ call assert_equal("TŢŤŦṪṮ", toupper("TŢŤŦṪṮ"))
+ call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("UÙÚÛÜŨŪŬŮŰŲƯǓỦ"))
+ call assert_equal("VṼ", toupper("VṼ"))
+ call assert_equal("WŴẀẂẄẆ", toupper("WŴẀẂẄẆ"))
+ call assert_equal("XẊẌ", toupper("XẊẌ"))
+ call assert_equal("YÝŶŸẎỲỶỸ", toupper("YÝŶŸẎỲỶỸ"))
+ call assert_equal("ZŹŻŽƵẐẔ", toupper("ZŹŻŽƵẐẔ"))
+
+ call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
+endfunc
+
+
diff --git a/src/nvim/testdir/test_normal.vim b/src/nvim/testdir/test_normal.vim
index a22dca35cc..c529971528 100644
--- a/src/nvim/testdir/test_normal.vim
+++ b/src/nvim/testdir/test_normal.vim
@@ -1606,6 +1606,40 @@ fun! Test_normal30_changecase()
norm! V~
call assert_equal('THIS IS A simple test: äüöss', getline('.'))
+ " Turkish ASCII turns to multi-byte. On Mac the Turkish locale is available
+ " but toupper()/tolower() don't do the right thing.
+ if !has('mac') && !has('osx')
+ try
+ lang tr_TR.UTF-8
+ set casemap=
+ call setline(1, 'iI')
+ 1normal gUU
+ call assert_equal("\u0130I", getline(1))
+ call assert_equal("\u0130I", toupper("iI"))
+
+ call setline(1, 'iI')
+ 1normal guu
+ call assert_equal("i\u0131", getline(1))
+ call assert_equal("i\u0131", tolower("iI"))
+
+ set casemap&
+ call setline(1, 'iI')
+ 1normal gUU
+ call assert_equal("II", getline(1))
+ call assert_equal("II", toupper("iI"))
+
+ call setline(1, 'iI')
+ 1normal guu
+ call assert_equal("ii", getline(1))
+ call assert_equal("ii", tolower("iI"))
+
+ lang en_US.UTF-8
+ catch /E197:/
+ " can't use Turkish locale
+ throw 'Skipped: Turkish locale not available'
+ endtry
+ endif
+
" clean up
bw!
endfunc
diff --git a/test/functional/ex_cmds/syntax_spec.lua b/test/functional/ex_cmds/syntax_spec.lua
new file mode 100644
index 0000000000..c9e96703de
--- /dev/null
+++ b/test/functional/ex_cmds/syntax_spec.lua
@@ -0,0 +1,17 @@
+local helpers = require('test.functional.helpers')(after_each)
+
+local eq = helpers.eq
+local clear = helpers.clear
+local exc_exec = helpers.exc_exec
+
+describe(':syntax', function()
+ before_each(clear)
+
+ describe('keyword', function()
+ it('does not crash when group name contains unprintable characters',
+ function()
+ eq('Vim(syntax):E669: Unprintable character in group name',
+ exc_exec('syntax keyword \024 foo bar'))
+ end)
+ end)
+end)
diff --git a/test/functional/normal/lang_spec.lua b/test/functional/normal/lang_spec.lua
new file mode 100644
index 0000000000..464b85d684
--- /dev/null
+++ b/test/functional/normal/lang_spec.lua
@@ -0,0 +1,59 @@
+local helpers = require('test.functional.helpers')(after_each)
+local clear, insert, eq = helpers.clear, helpers.insert, helpers.eq
+local execute, expect = helpers.execute, helpers.expect
+local feed, eval = helpers.feed, helpers.eval
+local exc_exec = helpers.exc_exec
+
+describe('gu and gU', function()
+ before_each(clear)
+
+ it('works in any locale with default casemap', function()
+ eq('internal,keepascii', eval('&casemap'))
+ insert("iI")
+ feed("VgU")
+ expect("II")
+ feed("Vgu")
+ expect("ii")
+ end)
+
+ describe('works in Turkish locale', function()
+ if helpers.pending_win32(pending) then return end
+
+ clear()
+ if eval('has("mac")') ~= 0 then
+ pending("not yet on macOS", function() end)
+ return
+ end
+
+ local err = exc_exec('lang ctype tr_TR.UTF-8')
+ if err ~= 0 then
+ pending("Locale tr_TR.UTF-8 not supported", function() end)
+ return
+ end
+
+ before_each(function()
+ execute('lang ctype tr_TR.UTF-8')
+ end)
+
+ it('with default casemap', function()
+ eq('internal,keepascii', eval('&casemap'))
+ -- expect ASCII behavior
+ insert("iI")
+ feed("VgU")
+ expect("II")
+ feed("Vgu")
+ expect("ii")
+ end)
+
+ it('with casemap=""', function()
+ execute('set casemap=')
+ -- expect Turkish locale behavior
+ insert("iI")
+ feed("VgU")
+ expect("İI")
+ feed("Vgu")
+ expect("iı")
+ end)
+
+ end)
+end)
diff --git a/test/functional/spell/spellfile_spec.lua b/test/functional/spell/spellfile_spec.lua
new file mode 100644
index 0000000000..e7cd10d2ac
--- /dev/null
+++ b/test/functional/spell/spellfile_spec.lua
@@ -0,0 +1,108 @@
+local helpers = require('test.functional.helpers')(after_each)
+local lfs = require('lfs')
+
+local eq = helpers.eq
+local clear = helpers.clear
+local meths = helpers.meths
+local exc_exec = helpers.exc_exec
+local write_file = helpers.write_file
+
+local testdir = 'Xtest-functional-spell-spellfile.d'
+
+describe('spellfile', function()
+ before_each(function()
+ clear()
+ lfs.mkdir(testdir)
+ lfs.mkdir(testdir .. '/spell')
+ end)
+ after_each(function()
+ lfs.rmdir(testdir)
+ end)
+ -- ┌ Magic string (#VIMSPELLMAGIC)
+ -- │ ┌ Spell file version (#VIMSPELLVERSION)
+ local spellheader = 'VIMspell\050'
+ it('errors out when prefcond section is truncated', function()
+ meths.set_option('runtimepath', testdir)
+ write_file(testdir .. '/spell/en.ascii.spl',
+ -- ┌ Section identifier (#SN_PREFCOND)
+ -- │ ┌ Section flags (#SNF_REQUIRED or zero)
+ -- │ │ ┌ Section length (4 bytes, MSB first)
+ spellheader .. '\003\001\000\000\000\003'
+ -- ┌ Number of regexes in section (2 bytes, MSB first)
+ -- │ ┌ Condition length (1 byte)
+ -- │ │ ┌ Condition regex (missing!)
+ .. '\000\001\001')
+ meths.set_option('spelllang', 'en')
+ eq('Vim(set):E758: Truncated spell file',
+ exc_exec('set spell'))
+ end)
+ it('errors out when prefcond regexp contains NUL byte', function()
+ meths.set_option('runtimepath', testdir)
+ write_file(testdir .. '/spell/en.ascii.spl',
+ -- ┌ Section identifier (#SN_PREFCOND)
+ -- │ ┌ Section flags (#SNF_REQUIRED or zero)
+ -- │ │ ┌ Section length (4 bytes, MSB first)
+ spellheader .. '\003\001\000\000\000\008'
+ -- ┌ Number of regexes in section (2 bytes, MSB first)
+ -- │ ┌ Condition length (1 byte)
+ -- │ │ ┌ Condition regex
+ -- │ │ │ ┌ End of sections marker
+ .. '\000\001\005ab\000cd\255'
+ -- ┌ LWORDTREE tree length (4 bytes)
+ -- │ ┌ KWORDTREE tree length (4 bytes)
+ -- │ │ ┌ PREFIXTREE tree length
+ .. '\000\000\000\000\000\000\000\000\000\000\000\000')
+ meths.set_option('spelllang', 'en')
+ eq('Vim(set):E759: Format error in spell file',
+ exc_exec('set spell'))
+ end)
+ it('errors out when region contains NUL byte', function()
+ meths.set_option('runtimepath', testdir)
+ write_file(testdir .. '/spell/en.ascii.spl',
+ -- ┌ Section identifier (#SN_REGION)
+ -- │ ┌ Section flags (#SNF_REQUIRED or zero)
+ -- │ │ ┌ Section length (4 bytes, MSB first)
+ spellheader .. '\000\001\000\000\000\008'
+ -- ┌ Regions ┌ End of sections marker
+ .. '01234\00067\255'
+ -- ┌ LWORDTREE tree length (4 bytes)
+ -- │ ┌ KWORDTREE tree length (4 bytes)
+ -- │ │ ┌ PREFIXTREE tree length
+ .. '\000\000\000\000\000\000\000\000\000\000\000\000')
+ meths.set_option('spelllang', 'en')
+ eq('Vim(set):E759: Format error in spell file',
+ exc_exec('set spell'))
+ end)
+ it('errors out when SAL section contains NUL byte', function()
+ meths.set_option('runtimepath', testdir)
+ write_file(testdir .. '/spell/en.ascii.spl',
+ -- ┌ Section identifier (#SN_SAL)
+ -- │ ┌ Section flags (#SNF_REQUIRED or zero)
+ -- │ │ ┌ Section length (4 bytes, MSB first)
+ spellheader .. '\005\001\000\000\000\008'
+ -- ┌ salflags
+ -- │ ┌ salcount (2 bytes, MSB first)
+ -- │ │ ┌ salfromlen (1 byte)
+ -- │ │ │ ┌ Special character
+ -- │ │ │ │┌ salfrom (should not contain NUL)
+ -- │ │ │ ││ ┌ saltolen
+ -- │ │ │ ││ │ ┌ salto
+ -- │ │ │ ││ │ │┌ End of sections marker
+ .. '\000\000\001\0024\000\0017\255'
+ -- ┌ LWORDTREE tree length (4 bytes)
+ -- │ ┌ KWORDTREE tree length (4 bytes)
+ -- │ │ ┌ PREFIXTREE tree length
+ .. '\000\000\000\000\000\000\000\000\000\000\000\000')
+ meths.set_option('spelllang', 'en')
+ eq('Vim(set):E759: Format error in spell file',
+ exc_exec('set spell'))
+ end)
+ it('errors out when spell header contains NUL bytes', function()
+ meths.set_option('runtimepath', testdir)
+ write_file(testdir .. '/spell/en.ascii.spl',
+ spellheader:sub(1, -3) .. '\000\000')
+ meths.set_option('spelllang', 'en')
+ eq('Vim(set):E757: This does not look like a spell file',
+ exc_exec('set spell'))
+ end)
+end)