aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/nvim/buffer.c73
-rw-r--r--src/nvim/cursor.c8
-rw-r--r--src/nvim/diff.c9
-rw-r--r--src/nvim/edit.c156
-rw-r--r--src/nvim/eval/funcs.c113
-rw-r--r--src/nvim/ex_cmds.c32
-rw-r--r--src/nvim/ex_docmd.c26
-rw-r--r--src/nvim/ex_getln.c78
-rw-r--r--src/nvim/fileio.c63
-rw-r--r--src/nvim/fold.c10
-rw-r--r--src/nvim/getchar.c181
-rw-r--r--src/nvim/memline.c4
-rw-r--r--src/nvim/message.c59
-rw-r--r--src/nvim/move.c8
-rw-r--r--src/nvim/option.c56
-rw-r--r--src/nvim/path.c23
-rw-r--r--src/nvim/regexp.c57
-rw-r--r--src/nvim/regexp_nfa.c29
-rw-r--r--src/nvim/search.c60
-rw-r--r--src/nvim/spell.c575
-rw-r--r--src/nvim/spellfile.c210
-rw-r--r--src/nvim/strings.c10
-rw-r--r--src/nvim/syntax.c6
-rw-r--r--src/nvim/testdir/test_digraph.vim3
-rw-r--r--src/nvim/testdir/test_plus_arg_edit.vim4
-rw-r--r--src/nvim/testdir/test_search.vim2
-rw-r--r--src/nvim/window.c6
27 files changed, 694 insertions, 1167 deletions
diff --git a/src/nvim/buffer.c b/src/nvim/buffer.c
index 0ebe33f2f8..99cdde300d 100644
--- a/src/nvim/buffer.c
+++ b/src/nvim/buffer.c
@@ -3334,9 +3334,7 @@ void maketitle(void)
len = (int)STRLEN(buf_p);
if (len > 100) {
len -= 100;
- if (has_mbyte) {
- len += (*mb_tail_off)(buf_p, buf_p + len) + 1;
- }
+ len += (*mb_tail_off)(buf_p, buf_p + len) + 1;
buf_p += len;
}
STRCPY(icon_str, buf_p);
@@ -3661,17 +3659,12 @@ int build_stl_str_hl(
// truncate by removing bytes from the start of the group text.
if (group_len > stl_items[stl_groupitems[groupdepth]].maxwid) {
// { Determine the number of bytes to remove
- long n;
- if (has_mbyte) {
- // Find the first character that should be included.
- n = 0;
- while (group_len >= stl_items[stl_groupitems[groupdepth]].maxwid) {
- group_len -= ptr2cells(t + n);
- n += (*mb_ptr2len)(t + n);
- }
- } else {
- n = (long)(out_p - t)
- - stl_items[stl_groupitems[groupdepth]].maxwid + 1;
+
+ // Find the first character that should be included.
+ long n = 0;
+ while (group_len >= stl_items[stl_groupitems[groupdepth]].maxwid) {
+ group_len -= ptr2cells(t + n);
+ n += (*mb_ptr2len)(t + n);
}
// }
@@ -4183,13 +4176,10 @@ int build_stl_str_hl(
// If the item is too wide, truncate it from the beginning
if (l > maxwid) {
- while (l >= maxwid)
- if (has_mbyte) {
- l -= ptr2cells(t);
- t += (*mb_ptr2len)(t);
- } else {
- l -= byte2cells(*t++);
- }
+ while (l >= maxwid) {
+ l -= ptr2cells(t);
+ t += utfc_ptr2len(t);
+ }
// Early out if there isn't enough room for the truncation marker
if (out_p >= out_end_p) {
@@ -4372,26 +4362,19 @@ int build_stl_str_hl(
// If the truncation point we found is beyond the maximum
// length of the string, truncate the end of the string.
if (width - vim_strsize(trunc_p) >= maxwidth) {
- // If we are using a multi-byte encoding, walk from the beginning of the
+ // Walk from the beginning of the
// string to find the last character that will fit.
- if (has_mbyte) {
- trunc_p = out;
- width = 0;
- for (;; ) {
- width += ptr2cells(trunc_p);
- if (width >= maxwidth) {
- break;
- }
-
- // Note: Only advance the pointer if the next
- // character will fit in the available output space
- trunc_p += (*mb_ptr2len)(trunc_p);
+ trunc_p = out;
+ width = 0;
+ for (;; ) {
+ width += ptr2cells(trunc_p);
+ if (width >= maxwidth) {
+ break;
}
- // Otherwise put the truncation point at the end, leaving enough room
- // for a single-character truncation marker
- } else {
- trunc_p = out + maxwidth - 1;
+ // Note: Only advance the pointer if the next
+ // character will fit in the available output space
+ trunc_p += utfc_ptr2len(trunc_p);
}
// Ignore any items in the statusline that occur after
@@ -4410,16 +4393,10 @@ int build_stl_str_hl(
// Truncate at the truncation point we found
} else {
// { Determine how many bytes to remove
- long trunc_len;
- if (has_mbyte) {
- trunc_len = 0;
- while (width >= maxwidth) {
- width -= ptr2cells(trunc_p + trunc_len);
- trunc_len += (*mb_ptr2len)(trunc_p + trunc_len);
- }
- } else {
- // Truncate an extra character so we can insert our `<`.
- trunc_len = (width - maxwidth) + 1;
+ long trunc_len = 0;
+ while (width >= maxwidth) {
+ width -= ptr2cells(trunc_p + trunc_len);
+ trunc_len += utfc_ptr2len(trunc_p + trunc_len);
}
// }
diff --git a/src/nvim/cursor.c b/src/nvim/cursor.c
index d3ffab1759..74a6f77a6d 100644
--- a/src/nvim/cursor.c
+++ b/src/nvim/cursor.c
@@ -242,9 +242,7 @@ static int coladvance2(
}
// Prevent from moving onto a trail byte.
- if (has_mbyte) {
- mark_mb_adjustpos(curbuf, pos);
- }
+ mark_mb_adjustpos(curbuf, pos);
if (wcol < 0 || col < wcol) {
return FAIL;
@@ -378,9 +376,7 @@ void check_cursor_col_win(win_T *win)
} else {
win->w_cursor.col = len - 1;
// Move the cursor to the head byte.
- if (has_mbyte) {
- mark_mb_adjustpos(win->w_buffer, &win->w_cursor);
- }
+ mark_mb_adjustpos(win->w_buffer, &win->w_cursor);
}
} else if (win->w_cursor.col < 0) {
win->w_cursor.col = 0;
diff --git a/src/nvim/diff.c b/src/nvim/diff.c
index b9c293f6c8..1cdf84f9d0 100644
--- a/src/nvim/diff.c
+++ b/src/nvim/diff.c
@@ -719,15 +719,12 @@ static int diff_write_buffer(buf_T *buf, diffin_T *din)
for (lnum = 1; lnum <= buf->b_ml.ml_line_count; lnum++) {
for (s = ml_get_buf(buf, lnum, false); *s != NUL; ) {
if (diff_flags & DIFF_ICASE) {
- int c;
-
- // xdiff doesn't support ignoring case, fold-case the text.
- int orig_len;
char_u cbuf[MB_MAXBYTES + 1];
- c = PTR2CHAR(s);
+ // xdiff doesn't support ignoring case, fold-case the text.
+ int c = PTR2CHAR(s);
c = utf_fold(c);
- orig_len = utfc_ptr2len(s);
+ const int orig_len = utfc_ptr2len(s);
if (utf_char2bytes(c, cbuf) != orig_len) {
// TODO(Bram): handle byte length difference
memmove(ptr + len, s, orig_len);
diff --git a/src/nvim/edit.c b/src/nvim/edit.c
index 9c8d64a6b2..5d44c3274e 100644
--- a/src/nvim/edit.c
+++ b/src/nvim/edit.c
@@ -426,9 +426,9 @@ static void insert_enter(InsertState *s)
|| curwin->w_curswant > curwin->w_virtcol)
&& *(s->ptr = get_cursor_line_ptr() + curwin->w_cursor.col) != NUL) {
if (s->ptr[1] == NUL) {
- ++curwin->w_cursor.col;
- } else if (has_mbyte) {
- s->i = (*mb_ptr2len)(s->ptr);
+ curwin->w_cursor.col++;
+ } else {
+ s->i = utfc_ptr2len(s->ptr);
if (s->ptr[s->i] == NUL) {
curwin->w_cursor.col += s->i;
}
@@ -1299,11 +1299,10 @@ normalchar:
// special character. Let CTRL-] expand abbreviations without
// inserting it.
if (vim_iswordc(s->c)
- || (!echeck_abbr(
// Add ABBR_OFF for characters above 0x100, this is
// what check_abbr() expects.
- (has_mbyte && s->c >= 0x100) ? (s->c + ABBR_OFF) : s->c)
- && s->c != Ctrl_RSB)) {
+ || (!echeck_abbr((s->c >= 0x100) ? (s->c + ABBR_OFF) : s->c)
+ && s->c != Ctrl_RSB)) {
insert_special(s->c, false, false);
revins_legal++;
revins_chars++;
@@ -1574,14 +1573,12 @@ void edit_putchar(int c, bool highlight)
pc_status = PC_STATUS_UNSET;
if (curwin->w_p_rl) {
pc_col += curwin->w_grid.Columns - 1 - curwin->w_wcol;
- if (has_mbyte) {
- int fix_col = grid_fix_col(&curwin->w_grid, pc_col, pc_row);
+ const int fix_col = grid_fix_col(&curwin->w_grid, pc_col, pc_row);
- if (fix_col != pc_col) {
- grid_putchar(&curwin->w_grid, ' ', pc_row, fix_col, attr);
- curwin->w_wcol--;
- pc_status = PC_STATUS_RIGHT;
- }
+ if (fix_col != pc_col) {
+ grid_putchar(&curwin->w_grid, ' ', pc_row, fix_col, attr);
+ curwin->w_wcol--;
+ pc_status = PC_STATUS_RIGHT;
}
} else {
pc_col += curwin->w_wcol;
@@ -1817,10 +1814,11 @@ change_indent (
ptr = get_cursor_line_ptr();
while (vcol <= (int)curwin->w_virtcol) {
last_vcol = vcol;
- if (has_mbyte && new_cursor_col >= 0)
- new_cursor_col += (*mb_ptr2len)(ptr + new_cursor_col);
- else
- ++new_cursor_col;
+ if (new_cursor_col >= 0) {
+ new_cursor_col += utfc_ptr2len(ptr + new_cursor_col);
+ } else {
+ new_cursor_col++;
+ }
vcol += lbr_chartabsize(ptr, ptr + new_cursor_col, (colnr_T)vcol);
}
vcol = last_vcol;
@@ -1975,7 +1973,7 @@ void backspace_until_column(int col)
/// @return true when something was deleted.
static bool del_char_after_col(int limit_col)
{
- if (enc_utf8 && limit_col >= 0) {
+ if (limit_col >= 0) {
colnr_T ecol = curwin->w_cursor.col + 1;
// Make sure the cursor is at the start of a character, but
@@ -2174,15 +2172,14 @@ int ins_compl_add_infercase(char_u *str_arg, int len, bool icase, char_u *fname,
// Infer case of completed part.
// Find actual length of completion.
- if (has_mbyte) {
+ {
const char_u *p = str;
actual_len = 0;
while (*p != NUL) {
MB_PTR_ADV(p);
actual_len++;
}
- } else
- actual_len = len;
+ }
// Find actual length of original text.
{
@@ -2204,11 +2201,7 @@ int ins_compl_add_infercase(char_u *str_arg, int len, bool icase, char_u *fname,
{
const char_u *p = str;
for (i = 0; i < actual_len; i++) {
- if (has_mbyte) {
- wca[i] = mb_ptr2char_adv(&p);
- } else {
- wca[i] = *(p++);
- }
+ wca[i] = mb_ptr2char_adv(&p);
}
}
@@ -2216,11 +2209,7 @@ int ins_compl_add_infercase(char_u *str_arg, int len, bool icase, char_u *fname,
{
const char_u *p = compl_orig_text;
for (i = 0; i < min_len; i++) {
- if (has_mbyte) {
- c = mb_ptr2char_adv(&p);
- } else {
- c = *(p++);
- }
+ c = mb_ptr2char_adv(&p);
if (mb_islower(c)) {
has_lower = true;
if (mb_isupper(wca[i])) {
@@ -2241,11 +2230,7 @@ int ins_compl_add_infercase(char_u *str_arg, int len, bool icase, char_u *fname,
if (!has_lower) {
const char_u *p = compl_orig_text;
for (i = 0; i < min_len; i++) {
- if (has_mbyte) {
- c = mb_ptr2char_adv(&p);
- } else {
- c = *(p++);
- }
+ c = mb_ptr2char_adv(&p);
if (was_letter && mb_isupper(c) && mb_islower(wca[i])) {
// Rule 2 is satisfied.
for (i = actual_compl_length; i < actual_len; i++) {
@@ -2261,11 +2246,7 @@ int ins_compl_add_infercase(char_u *str_arg, int len, bool icase, char_u *fname,
{
const char_u *p = compl_orig_text;
for (i = 0; i < min_len; i++) {
- if (has_mbyte) {
- c = mb_ptr2char_adv(&p);
- } else {
- c = *(p++);
- }
+ c = mb_ptr2char_adv(&p);
if (mb_islower(c)) {
wca[i] = mb_tolower(wca[i]);
} else if (mb_isupper(c)) {
@@ -3059,12 +3040,9 @@ static void ins_compl_files(int count, char_u **files, int thesaurus, int flags,
*/
char_u *find_word_start(char_u *ptr)
{
- if (has_mbyte)
- while (*ptr != NUL && *ptr != '\n' && mb_get_class(ptr) <= 1)
- ptr += (*mb_ptr2len)(ptr);
- else
- while (*ptr != NUL && *ptr != '\n' && !vim_iswordc(*ptr))
- ++ptr;
+ while (*ptr != NUL && *ptr != '\n' && mb_get_class(ptr) <= 1) {
+ ptr += utfc_ptr2len(ptr);
+ }
return ptr;
}
@@ -3074,19 +3052,15 @@ char_u *find_word_start(char_u *ptr)
*/
char_u *find_word_end(char_u *ptr)
{
- int start_class;
-
- if (has_mbyte) {
- start_class = mb_get_class(ptr);
- if (start_class > 1)
- while (*ptr != NUL) {
- ptr += (*mb_ptr2len)(ptr);
- if (mb_get_class(ptr) != start_class)
- break;
+ const int start_class = mb_get_class(ptr);
+ if (start_class > 1) {
+ while (*ptr != NUL) {
+ ptr += utfc_ptr2len(ptr);
+ if (mb_get_class(ptr) != start_class) {
+ break;
}
- } else
- while (vim_iswordc(*ptr))
- ++ptr;
+ }
+ }
return ptr;
}
@@ -5557,10 +5531,9 @@ static void insert_special(int c, int allow_modmask, int ctrlv)
*/
# define ISSPECIAL(c) ((c) < ' ' || (c) >= DEL || (c) == '0' || (c) == '^')
-# define WHITECHAR(cc) (ascii_iswhite(cc) && \
- (!enc_utf8 || \
- !utf_iscomposing( \
- utf_ptr2char(get_cursor_pos_ptr() + 1))))
+#define WHITECHAR(cc) ( \
+ ascii_iswhite(cc) \
+ && !utf_iscomposing(utf_ptr2char(get_cursor_pos_ptr() + 1)))
/*
* "flags": INSCHAR_FORMAT - force formatting
@@ -5697,7 +5670,7 @@ void insertchar(
// Do the check for InsertCharPre before the call to vpeekc() because the
// InsertCharPre autocommand could change the input buffer.
if (!ISSPECIAL(c)
- && (!has_mbyte || (*mb_char2len)(c) == 1)
+ && (utf_char2len(c) == 1)
&& !has_event(EVENT_INSERTCHARPRE)
&& vpeekc() != NUL
&& !(State & REPLACE_FLAG)
@@ -7175,16 +7148,11 @@ static void replace_do_bs(int limit_col)
getvcol(curwin, &curwin->w_cursor, NULL, &start_vcol, NULL);
orig_vcols = chartabsize(get_cursor_pos_ptr(), start_vcol);
}
- if (has_mbyte) {
- (void)del_char_after_col(limit_col);
- if (l_State & VREPLACE_FLAG)
- orig_len = (int)STRLEN(get_cursor_pos_ptr());
- replace_push(cc);
- } else {
- pchar_cursor(cc);
- if (l_State & VREPLACE_FLAG)
- orig_len = (int)STRLEN(get_cursor_pos_ptr()) - 1;
+ (void)del_char_after_col(limit_col);
+ if (l_State & VREPLACE_FLAG) {
+ orig_len = (int)STRLEN(get_cursor_pos_ptr());
}
+ replace_push(cc);
replace_pop_ins();
if (l_State & VREPLACE_FLAG) {
@@ -7403,23 +7371,17 @@ bool in_cinkeys(int keytyped, int when, bool line_is_empty)
bool match = false;
if (keytyped == KEY_COMPLETE) {
- char_u *s;
+ char_u *n, *s;
/* Just completed a word, check if it starts with "look".
* search back for the start of a word. */
line = get_cursor_line_ptr();
- if (has_mbyte) {
- char_u *n;
-
- for (s = line + curwin->w_cursor.col; s > line; s = n) {
- n = mb_prevptr(line, s);
- if (!vim_iswordp(n))
- break;
+ for (s = line + curwin->w_cursor.col; s > line; s = n) {
+ n = mb_prevptr(line, s);
+ if (!vim_iswordp(n)) {
+ break;
}
- } else
- for (s = line + curwin->w_cursor.col; s > line; --s)
- if (!vim_iswordc(s[-1]))
- break;
+ }
assert(p >= look && (uintmax_t)(p - look) <= SIZE_MAX);
if (s + (p - look) <= line + curwin->w_cursor.col
&& (icase
@@ -8255,10 +8217,8 @@ static bool ins_bs(int c, int mode, int *inserted_space_p)
}
cc = gchar_cursor();
// look multi-byte character class
- if (has_mbyte) {
- prev_cclass = cclass;
- cclass = mb_get_class(get_cursor_pos_ptr());
- }
+ prev_cclass = cclass;
+ cclass = mb_get_class(get_cursor_pos_ptr());
if (mode == BACKSPACE_WORD && !ascii_isspace(cc)) { // start of word?
mode = BACKSPACE_WORD_NOT_SPACE;
temp = vim_iswordc(cc);
@@ -8272,19 +8232,18 @@ static bool ins_bs(int c, int mode, int *inserted_space_p)
}
break;
}
- if (State & REPLACE_FLAG)
+ if (State & REPLACE_FLAG) {
replace_do_bs(-1);
- else {
- const bool l_enc_utf8 = enc_utf8;
+ } else {
const int l_p_deco = p_deco;
- if (l_enc_utf8 && l_p_deco) {
+ if (l_p_deco) {
(void)utfc_ptr2char(get_cursor_pos_ptr(), cpc);
}
(void)del_char(false);
// If there are combining characters and 'delcombine' is set
// move the cursor back. Don't back up before the base
// character.
- if (l_enc_utf8 && l_p_deco && cpc[0] != NUL) {
+ if (l_p_deco && cpc[0] != NUL) {
inc_cursor();
}
if (revins_chars) {
@@ -8522,13 +8481,10 @@ static void ins_right(void)
AppendCharToRedobuff(K_RIGHT);
}
curwin->w_set_curswant = true;
- if (virtual_active())
+ if (virtual_active()) {
oneright();
- else {
- if (has_mbyte)
- curwin->w_cursor.col += (*mb_ptr2len)(get_cursor_pos_ptr());
- else
- ++curwin->w_cursor.col;
+ } else {
+ curwin->w_cursor.col += utfc_ptr2len(get_cursor_pos_ptr());
}
revins_legal++;
diff --git a/src/nvim/eval/funcs.c b/src/nvim/eval/funcs.c
index 679548ab91..eee40965e0 100644
--- a/src/nvim/eval/funcs.c
+++ b/src/nvim/eval/funcs.c
@@ -783,10 +783,10 @@ static void byteidx(typval_T *argvars, typval_T *rettv, int comp)
if (*t == NUL) { // EOL reached.
return;
}
- if (enc_utf8 && comp) {
+ if (comp) {
t += utf_ptr2len((const char_u *)t);
} else {
- t += (*mb_ptr2len)((const char_u *)t);
+ t += utfc_ptr2len((const char_u *)t);
}
}
rettv->vval.v_number = (varnumber_T)(t - str);
@@ -1427,9 +1427,7 @@ static void f_cursor(typval_T *argvars, typval_T *rettv, FunPtr fptr)
// Make sure the cursor is in a valid position.
check_cursor();
// Correct cursor for multi-byte character.
- if (has_mbyte) {
- mb_adjust_cursor();
- }
+ mb_adjust_cursor();
curwin->w_set_curswant = set_curswant;
rettv->vval.v_number = 0;
@@ -4198,7 +4196,7 @@ static void f_has(typval_T *argvars, typval_T *rettv, FunPtr fptr)
} else if (STRICMP(name, "ttyout") == 0) {
n = stdout_isatty;
} else if (STRICMP(name, "multi_byte_encoding") == 0) {
- n = has_mbyte != 0;
+ n = true;
} else if (STRICMP(name, "syntax_items") == 0) {
n = syntax_present(curwin);
#ifdef UNIX
@@ -8025,14 +8023,9 @@ static void f_setcharsearch(typval_T *argvars, typval_T *rettv, FunPtr fptr)
if ((d = argvars[0].vval.v_dict) != NULL) {
char_u *const csearch = (char_u *)tv_dict_get_string(d, "char", false);
if (csearch != NULL) {
- if (enc_utf8) {
- int pcc[MAX_MCO];
- int c = utfc_ptr2char(csearch, pcc);
- set_last_csearch(c, csearch, utfc_ptr2len(csearch));
- }
- else
- set_last_csearch(PTR2CHAR(csearch),
- csearch, utfc_ptr2len(csearch));
+ int pcc[MAX_MCO];
+ const int c = utfc_ptr2char(csearch, pcc);
+ set_last_csearch(c, csearch, utfc_ptr2len(csearch));
}
di = tv_dict_find(d, S_LEN("forward"));
@@ -10711,72 +10704,54 @@ static void f_tr(typval_T *argvars, typval_T *rettv, FunPtr fptr)
garray_T ga;
ga_init(&ga, (int)sizeof(char), 80);
- if (!has_mbyte) {
- // Not multi-byte: fromstr and tostr must be the same length.
- if (strlen(fromstr) != strlen(tostr)) {
- goto error;
- }
- }
-
// fromstr and tostr have to contain the same number of chars.
bool first = true;
while (*in_str != NUL) {
- if (has_mbyte) {
- const char *cpstr = in_str;
- const int inlen = (*mb_ptr2len)((const char_u *)in_str);
- int cplen = inlen;
- int idx = 0;
- int fromlen;
- for (const char *p = fromstr; *p != NUL; p += fromlen) {
- fromlen = (*mb_ptr2len)((const char_u *)p);
- if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0) {
- int tolen;
- for (p = tostr; *p != NUL; p += tolen) {
- tolen = (*mb_ptr2len)((const char_u *)p);
- if (idx-- == 0) {
- cplen = tolen;
- cpstr = (char *)p;
- break;
- }
- }
- if (*p == NUL) { // tostr is shorter than fromstr.
- goto error;
- }
- break;
- }
- idx++;
- }
-
- if (first && cpstr == in_str) {
- // Check that fromstr and tostr have the same number of
- // (multi-byte) characters. Done only once when a character
- // of in_str doesn't appear in fromstr.
- first = false;
+ const char *cpstr = in_str;
+ const int inlen = utfc_ptr2len((const char_u *)in_str);
+ int cplen = inlen;
+ int idx = 0;
+ int fromlen;
+ for (const char *p = fromstr; *p != NUL; p += fromlen) {
+ fromlen = utfc_ptr2len((const char_u *)p);
+ if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0) {
int tolen;
- for (const char *p = tostr; *p != NUL; p += tolen) {
- tolen = (*mb_ptr2len)((const char_u *)p);
- idx--;
+ for (p = tostr; *p != NUL; p += tolen) {
+ tolen = utfc_ptr2len((const char_u *)p);
+ if (idx-- == 0) {
+ cplen = tolen;
+ cpstr = (char *)p;
+ break;
+ }
}
- if (idx != 0) {
+ if (*p == NUL) { // tostr is shorter than fromstr.
goto error;
}
+ break;
}
+ idx++;
+ }
- ga_grow(&ga, cplen);
- memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
- ga.ga_len += cplen;
-
- in_str += inlen;
- } else {
- // When not using multi-byte chars we can do it faster.
- const char *const p = strchr(fromstr, *in_str);
- if (p != NULL) {
- ga_append(&ga, tostr[p - fromstr]);
- } else {
- ga_append(&ga, *in_str);
+ if (first && cpstr == in_str) {
+ // Check that fromstr and tostr have the same number of
+ // (multi-byte) characters. Done only once when a character
+ // of in_str doesn't appear in fromstr.
+ first = false;
+ int tolen;
+ for (const char *p = tostr; *p != NUL; p += tolen) {
+ tolen = utfc_ptr2len((const char_u *)p);
+ idx--;
+ }
+ if (idx != 0) {
+ goto error;
}
- in_str++;
}
+
+ ga_grow(&ga, cplen);
+ memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
+ ga.ga_len += cplen;
+
+ in_str += inlen;
}
// add a terminating NUL
diff --git a/src/nvim/ex_cmds.c b/src/nvim/ex_cmds.c
index 17afb33059..b8a0c3184b 100644
--- a/src/nvim/ex_cmds.c
+++ b/src/nvim/ex_cmds.c
@@ -795,10 +795,7 @@ void ex_retab(exarg_T *eap)
if (ptr[col] == NUL)
break;
vcol += chartabsize(ptr + col, (colnr_T)vcol);
- if (has_mbyte)
- col += (*mb_ptr2len)(ptr + col);
- else
- ++col;
+ col += utfc_ptr2len(ptr + col);
}
if (new_line == NULL) /* out of memory */
break;
@@ -3465,7 +3462,7 @@ static buf_T *do_sub(exarg_T *eap, proftime_T timeout,
int lastone;
long nmatch_tl = 0; // nr of lines matched below lnum
int do_again; // do it again after joining lines
- int skip_match = false;
+ bool skip_match = false;
linenr_T sub_firstlnum; // nr of first sub line
/*
@@ -3576,16 +3573,13 @@ static buf_T *do_sub(exarg_T *eap, proftime_T timeout,
if (matchcol == prev_matchcol
&& regmatch.endpos[0].lnum == 0
&& matchcol == regmatch.endpos[0].col) {
- if (sub_firstline[matchcol] == NUL)
- /* We already were at the end of the line. Don't look
- * for a match in this line again. */
- skip_match = TRUE;
- else {
- /* search for a match at next column */
- if (has_mbyte)
- matchcol += mb_ptr2len(sub_firstline + matchcol);
- else
- ++matchcol;
+ if (sub_firstline[matchcol] == NUL) {
+ // We already were at the end of the line. Don't look
+ // for a match in this line again.
+ skip_match = true;
+ } else {
+ // search for a match at next column
+ matchcol += mb_ptr2len(sub_firstline + matchcol);
}
// match will be pushed to preview_lines, bring it into a proper state
current_match.start.col = matchcol;
@@ -3609,7 +3603,7 @@ static buf_T *do_sub(exarg_T *eap, proftime_T timeout,
if (nmatch > 1) {
matchcol = (colnr_T)STRLEN(sub_firstline);
nmatch = 1;
- skip_match = TRUE;
+ skip_match = true;
}
sub_nsubs++;
did_sub = TRUE;
@@ -3779,7 +3773,7 @@ static buf_T *do_sub(exarg_T *eap, proftime_T timeout,
* get stuck when pressing 'n'. */
if (nmatch > 1) {
matchcol = (colnr_T)STRLEN(sub_firstline);
- skip_match = TRUE;
+ skip_match = true;
}
goto skip;
}
@@ -3956,8 +3950,8 @@ static buf_T *do_sub(exarg_T *eap, proftime_T timeout,
STRMOVE(new_start, p1 + 1);
p1 = new_start - 1;
}
- } else if (has_mbyte) {
- p1 += (*mb_ptr2len)(p1) - 1;
+ } else {
+ p1 += utfc_ptr2len(p1) - 1;
}
}
size_t new_endcol = STRLEN(new_start);
diff --git a/src/nvim/ex_docmd.c b/src/nvim/ex_docmd.c
index a491a9d377..fc699e8826 100644
--- a/src/nvim/ex_docmd.c
+++ b/src/nvim/ex_docmd.c
@@ -2505,12 +2505,8 @@ static void append_command(char_u *cmd)
STRCAT(IObuff, ": ");
d = IObuff + STRLEN(IObuff);
while (*s != NUL && d - IObuff < IOSIZE - 7) {
- if (
- enc_utf8 ? (s[0] == 0xc2 && s[1] == 0xa0) :
- *s == 0xa0) {
- s +=
- enc_utf8 ? 2 :
- 1;
+ if (s[0] == 0xc2 && s[1] == 0xa0) {
+ s += 2;
STRCPY(d, "<a0>");
d += 4;
} else
@@ -5564,7 +5560,8 @@ static char_u *uc_split_args(char_u *arg, size_t *lenp)
break;
len += 3; /* "," */
} else {
- int charlen = (*mb_ptr2len)(p);
+ const int charlen = utfc_ptr2len(p);
+
len += charlen;
p += charlen;
}
@@ -8266,12 +8263,10 @@ static void ex_normal(exarg_T *eap)
return;
}
- /*
- * vgetc() expects a CSI and K_SPECIAL to have been escaped. Don't do
- * this for the K_SPECIAL leading byte, otherwise special keys will not
- * work.
- */
- if (has_mbyte) {
+ // vgetc() expects a CSI and K_SPECIAL to have been escaped. Don't do
+ // this for the K_SPECIAL leading byte, otherwise special keys will not
+ // work.
+ {
int len = 0;
/* Count the number of characters to be escaped. */
@@ -8310,9 +8305,8 @@ static void ex_normal(exarg_T *eap)
check_cursor_moved(curwin);
}
- exec_normal_cmd(
- arg != NULL ? arg :
- eap->arg, eap->forceit ? REMAP_NONE : REMAP_YES, FALSE);
+ exec_normal_cmd(arg != NULL ? arg : eap->arg,
+ eap->forceit ? REMAP_NONE : REMAP_YES, false);
} while (eap->addr_count > 0 && eap->line1 <= eap->line2 && !got_int);
}
diff --git a/src/nvim/ex_getln.c b/src/nvim/ex_getln.c
index 53feffd2d7..9edb826ea6 100644
--- a/src/nvim/ex_getln.c
+++ b/src/nvim/ex_getln.c
@@ -2096,7 +2096,7 @@ static int command_line_handle_key(CommandLineState *s)
s->do_abbr = false; // don't do abbreviation now
ccline.special_char = NUL;
// may need to remove ^ when composing char was typed
- if (enc_utf8 && utf_iscomposing(s->c) && !cmd_silent) {
+ if (utf_iscomposing(s->c) && !cmd_silent) {
if (ui_has(kUICmdline)) {
// TODO(bfredl): why not make unputcmdline also work with true?
unputcmdline();
@@ -2143,9 +2143,8 @@ static int command_line_handle_key(CommandLineState *s)
if (s->do_abbr && (IS_SPECIAL(s->c) || !vim_iswordc(s->c))
// Add ABBR_OFF for characters above 0x100, this is
// what check_abbr() expects.
- && (ccheck_abbr((has_mbyte && s->c >= 0x100) ?
- (s->c + ABBR_OFF) : s->c)
- || s->c == Ctrl_RSB)) {
+ && (ccheck_abbr((s->c >= 0x100) ? (s->c + ABBR_OFF) : s->c)
+ || s->c == Ctrl_RSB)) {
return command_line_changed(s);
}
@@ -2254,7 +2253,7 @@ static int command_line_changed(CommandLineState *s)
may_do_incsearch_highlighting(s->firstc, s->count, &s->is_state);
}
- if (cmdmsg_rl || (p_arshape && !p_tbidi && enc_utf8)) {
+ if (cmdmsg_rl || (p_arshape && !p_tbidi)) {
// Always redraw the whole command line to fix shaping and
// right-left typing. Not efficient, but it works.
// Do it only when there are no characters left to read
@@ -3139,11 +3138,9 @@ static void draw_cmdline(int start, int len)
if (cmdline_star > 0) {
for (int i = 0; i < len; i++) {
msg_putchar('*');
- if (has_mbyte) {
- i += (*mb_ptr2len)(ccline.cmdbuff + start + i) - 1;
- }
+ i += utfc_ptr2len(ccline.cmdbuff + start + i) - 1;
}
- } else if (p_arshape && !p_tbidi && enc_utf8 && len > 0) {
+ } else if (p_arshape && !p_tbidi && len > 0) {
bool do_arabicshape = false;
int mb_l;
for (int i = start; i < start + len; i += mb_l) {
@@ -3439,32 +3436,31 @@ void put_on_cmdline(char_u *str, int len, int redraw)
(size_t)(ccline.cmdlen - ccline.cmdpos));
ccline.cmdlen += len;
} else {
- if (has_mbyte) {
- /* Count nr of characters in the new string. */
- m = 0;
- for (i = 0; i < len; i += (*mb_ptr2len)(str + i))
- ++m;
- /* Count nr of bytes in cmdline that are overwritten by these
- * characters. */
- for (i = ccline.cmdpos; i < ccline.cmdlen && m > 0;
- i += (*mb_ptr2len)(ccline.cmdbuff + i))
- --m;
- if (i < ccline.cmdlen) {
- memmove(ccline.cmdbuff + ccline.cmdpos + len,
- ccline.cmdbuff + i, (size_t)(ccline.cmdlen - i));
- ccline.cmdlen += ccline.cmdpos + len - i;
- } else
- ccline.cmdlen = ccline.cmdpos + len;
- } else if (ccline.cmdpos + len > ccline.cmdlen)
+ // Count nr of characters in the new string.
+ m = 0;
+ for (i = 0; i < len; i += utfc_ptr2len(str + i)) {
+ m++;
+ }
+ // Count nr of bytes in cmdline that are overwritten by these
+ // characters.
+ for (i = ccline.cmdpos; i < ccline.cmdlen && m > 0;
+ i += utfc_ptr2len(ccline.cmdbuff + i)) {
+ m--;
+ }
+ if (i < ccline.cmdlen) {
+ memmove(ccline.cmdbuff + ccline.cmdpos + len,
+ ccline.cmdbuff + i, (size_t)(ccline.cmdlen - i));
+ ccline.cmdlen += ccline.cmdpos + len - i;
+ } else {
ccline.cmdlen = ccline.cmdpos + len;
+ }
}
memmove(ccline.cmdbuff + ccline.cmdpos, str, (size_t)len);
ccline.cmdbuff[ccline.cmdlen] = NUL;
- if (enc_utf8) {
- /* When the inserted text starts with a composing character,
- * backup to the character before it. There could be two of them.
- */
+ {
+ // When the inserted text starts with a composing character,
+ // backup to the character before it. There could be two of them.
i = 0;
c = utf_ptr2char(ccline.cmdbuff + ccline.cmdpos);
while (ccline.cmdpos > 0 && utf_iscomposing(c)) {
@@ -3515,23 +3511,19 @@ void put_on_cmdline(char_u *str, int len, int redraw)
for (i = 0; i < len; i++) {
c = cmdline_charsize(ccline.cmdpos);
// count ">" for a double-wide char that doesn't fit.
- if (has_mbyte) {
- correct_screencol(ccline.cmdpos, c, &ccline.cmdspos);
- }
+ correct_screencol(ccline.cmdpos, c, &ccline.cmdspos);
// Stop cursor at the end of the screen, but do increment the
// insert position, so that entering a very long command
// works, even though you can't see it.
if (ccline.cmdspos + c < m) {
ccline.cmdspos += c;
}
- if (has_mbyte) {
- c = (*mb_ptr2len)(ccline.cmdbuff + ccline.cmdpos) - 1;
- if (c > len - i - 1) {
- c = len - i - 1;
- }
- ccline.cmdpos += c;
- i += c;
+ c = utfc_ptr2len(ccline.cmdbuff + ccline.cmdpos) - 1;
+ if (c > len - i - 1) {
+ c = len - i - 1;
}
+ ccline.cmdpos += c;
+ i += c;
ccline.cmdpos++;
}
@@ -3676,11 +3668,7 @@ void cmdline_paste_str(char_u *s, int literally)
if (cv == Ctrl_V && s[1]) {
s++;
}
- if (has_mbyte) {
- c = mb_cptr2char_adv((const char_u **)&s);
- } else {
- c = *s++;
- }
+ c = mb_cptr2char_adv((const char_u **)&s);
if (cv == Ctrl_V || c == ESC || c == Ctrl_C
|| c == CAR || c == NL || c == Ctrl_L
|| (c == Ctrl_BSL && *s == Ctrl_N)) {
diff --git a/src/nvim/fileio.c b/src/nvim/fileio.c
index e349f00fba..49de0f3bf8 100644
--- a/src/nvim/fileio.c
+++ b/src/nvim/fileio.c
@@ -910,20 +910,18 @@ retry:
/* "ucs-bom" means we need to check the first bytes of the file
* for a BOM. */
- if (STRCMP(fenc, ENC_UCSBOM) == 0)
+ if (STRCMP(fenc, ENC_UCSBOM) == 0) {
fio_flags = FIO_UCSBOM;
-
- /*
- * Check if UCS-2/4 or Latin1 to UTF-8 conversion needs to be
- * done. This is handled below after read(). Prepare the
- * fio_flags to avoid having to parse the string each time.
- * Also check for Unicode to Latin1 conversion, because iconv()
- * appears not to handle this correctly. This works just like
- * conversion to UTF-8 except how the resulting character is put in
- * the buffer.
- */
- else if (enc_utf8 || STRCMP(p_enc, "latin1") == 0)
+ } else {
+ // Check if UCS-2/4 or Latin1 to UTF-8 conversion needs to be
+ // done. This is handled below after read(). Prepare the
+ // fio_flags to avoid having to parse the string each time.
+ // Also check for Unicode to Latin1 conversion, because iconv()
+ // appears not to handle this correctly. This works just like
+ // conversion to UTF-8 except how the resulting character is put in
+ // the buffer.
fio_flags = get_fio_flags(fenc);
+ }
@@ -932,8 +930,7 @@ retry:
if (fio_flags == 0
&& !did_iconv
) {
- iconv_fd = (iconv_t)my_iconv_open(
- enc_utf8 ? (char_u *)"utf-8" : p_enc, fenc);
+ iconv_fd = (iconv_t)my_iconv_open((char_u *)"utf-8", fenc);
}
# endif
@@ -1202,7 +1199,7 @@ retry:
&& (fio_flags == FIO_UCSBOM
|| (!curbuf->b_p_bomb
&& tmpname == NULL
- && (*fenc == 'u' || (*fenc == NUL && enc_utf8))))) {
+ && (*fenc == 'u' || *fenc == NUL)))) {
char_u *ccname;
int blen;
@@ -1468,8 +1465,8 @@ retry:
memmove(line_start, buffer, (size_t)linerest);
size = (long)((ptr + real_size) - dest);
ptr = dest;
- } else if (enc_utf8 && !curbuf->b_p_bin) {
- int incomplete_tail = FALSE;
+ } else if (!curbuf->b_p_bin) {
+ bool incomplete_tail = false;
// Reading UTF-8: Check if the bytes are valid UTF-8.
for (p = ptr;; p++) {
@@ -1486,15 +1483,16 @@ retry:
// then.
l = utf_ptr2len_len(p, todo);
if (l > todo && !incomplete_tail) {
- /* Avoid retrying with a different encoding when
- * a truncated file is more likely, or attempting
- * to read the rest of an incomplete sequence when
- * we have already done so. */
- if (p > ptr || filesize > 0)
- incomplete_tail = TRUE;
- /* Incomplete byte sequence, move it to conv_rest[]
- * and try to read the rest of it, unless we've
- * already done so. */
+ // Avoid retrying with a different encoding when
+ // a truncated file is more likely, or attempting
+ // to read the rest of an incomplete sequence when
+ // we have already done so.
+ if (p > ptr || filesize > 0) {
+ incomplete_tail = true;
+ }
+ // Incomplete byte sequence, move it to conv_rest[]
+ // and try to read the rest of it, unless we've
+ // already done so.
if (p > ptr) {
conv_restlen = todo;
memmove(conv_rest, p, conv_restlen);
@@ -2165,8 +2163,8 @@ readfile_charconvert (
else {
close(*fdp); /* close the input file, ignore errors */
*fdp = -1;
- if (eval_charconvert((char *) fenc, enc_utf8 ? "utf-8" : (char *) p_enc,
- (char *) fname, (char *) tmpname) == FAIL) {
+ if (eval_charconvert((char *)fenc, "utf-8",
+ (char *)fname, (char *)tmpname) == FAIL) {
errmsg = (char_u *)_("Conversion with 'charconvert' failed");
}
if (errmsg == NULL && (*fdp = os_open((char *)tmpname, O_RDONLY, 0)) < 0) {
@@ -3067,7 +3065,7 @@ nobackup:
// Check if UTF-8 to UCS-2/4 or Latin1 conversion needs to be done. Or
// Latin1 to Unicode conversion. This is handled in buf_write_bytes().
// Prepare the flags for it and allocate bw_conv_buf when needed.
- if (converted && (enc_utf8 || STRCMP(p_enc, "latin1") == 0)) {
+ if (converted) {
wb_flags = get_fio_flags(fenc);
if (wb_flags & (FIO_UCS2 | FIO_UCS4 | FIO_UTF16 | FIO_UTF8)) {
// Need to allocate a buffer to translate into.
@@ -3089,8 +3087,7 @@ nobackup:
# ifdef HAVE_ICONV
// Use iconv() conversion when conversion is needed and it's not done
// internally.
- write_info.bw_iconv_fd = (iconv_t)my_iconv_open(fenc,
- enc_utf8 ? (char_u *)"utf-8" : p_enc);
+ write_info.bw_iconv_fd = (iconv_t)my_iconv_open(fenc, (char_u *)"utf-8");
if (write_info.bw_iconv_fd != (iconv_t)-1) {
/* We're going to use iconv(), allocate a buffer to convert in. */
write_info.bw_conv_buflen = bufsize * ICONV_MULT;
@@ -3433,7 +3430,7 @@ restore_backup:
// The file was written to a temp file, now it needs to be converted
// with 'charconvert' to (overwrite) the output file.
if (end != 0) {
- if (eval_charconvert(enc_utf8 ? "utf-8" : (char *)p_enc, (char *)fenc,
+ if (eval_charconvert("utf-8", (char *)fenc,
(char *)wfname, (char *)fname) == FAIL) {
write_info.bw_conv_error = true;
end = 0;
@@ -4189,7 +4186,7 @@ static bool need_conversion(const char_u *fenc)
/* Encodings differ. However, conversion is not needed when 'enc' is any
* Unicode encoding and the file is UTF-8. */
- return !(enc_utf8 && fenc_flags == FIO_UTF8);
+ return !(fenc_flags == FIO_UTF8);
}
/// Return the FIO_ flags needed for the internal conversion if 'name' was
diff --git a/src/nvim/fold.c b/src/nvim/fold.c
index 24a73a5b9f..5e28ca6538 100644
--- a/src/nvim/fold.c
+++ b/src/nvim/fold.c
@@ -1037,11 +1037,11 @@ void foldAdjustVisual(void)
if (hasFolding(end->lnum, NULL, &end->lnum)) {
ptr = ml_get(end->lnum);
end->col = (colnr_T)STRLEN(ptr);
- if (end->col > 0 && *p_sel == 'o')
- --end->col;
- /* prevent cursor from moving on the trail byte */
- if (has_mbyte)
- mb_adjust_cursor();
+ if (end->col > 0 && *p_sel == 'o') {
+ end->col--;
+ }
+ // prevent cursor from moving on the trail byte
+ mb_adjust_cursor();
}
}
diff --git a/src/nvim/getchar.c b/src/nvim/getchar.c
index 456979be00..a5c81b2795 100644
--- a/src/nvim/getchar.c
+++ b/src/nvim/getchar.c
@@ -563,9 +563,7 @@ void AppendToRedobuffLit(const char_u *str, int len)
// Handle a special or multibyte character.
// Composing chars separately are handled separately.
- const int c = (has_mbyte
- ? mb_cptr2char_adv((const char_u **)&s)
- : (uint8_t)(*s++));
+ const int c = mb_cptr2char_adv((const char_u **)&s);
if (c < ' ' || c == DEL || (*s == NUL && (c == '0' || c == '^'))) {
add_char_buff(&redobuff, Ctrl_V);
}
@@ -684,15 +682,16 @@ static int read_redo(bool init, bool old_redo)
if ((c = *p) == NUL) {
return c;
}
- /* Reverse the conversion done by add_char_buff() */
- /* For a multi-byte character get all the bytes and return the
- * converted character. */
- if (has_mbyte && (c != K_SPECIAL || p[1] == KS_SPECIAL))
+ // Reverse the conversion done by add_char_buff() */
+ // For a multi-byte character get all the bytes and return the
+ // converted character.
+ if (c != K_SPECIAL || p[1] == KS_SPECIAL) {
n = MB_BYTE2LEN_CHECK(c);
- else
+ } else {
n = 1;
- for (i = 0;; ++i) {
- if (c == K_SPECIAL) { /* special key or escaped K_SPECIAL */
+ }
+ for (i = 0;; i++) {
+ if (c == K_SPECIAL) { // special key or escaped K_SPECIAL
c = TO_SPECIAL(p[1], p[2]);
p += 2;
}
@@ -2161,14 +2160,11 @@ static int vgetorpeek(bool advance)
col = vcol = curwin->w_wcol = 0;
ptr = get_cursor_line_ptr();
while (col < curwin->w_cursor.col) {
- if (!ascii_iswhite(ptr[col]))
+ if (!ascii_iswhite(ptr[col])) {
curwin->w_wcol = vcol;
- vcol += lbr_chartabsize(ptr, ptr + col,
- (colnr_T)vcol);
- if (has_mbyte)
- col += (*mb_ptr2len)(ptr + col);
- else
- ++col;
+ }
+ vcol += lbr_chartabsize(ptr, ptr + col, (colnr_T)vcol);
+ col += utfc_ptr2len(ptr + col);
}
curwin->w_wrow = curwin->w_cline_row
+ curwin->w_wcol / curwin->w_width_inner;
@@ -2813,33 +2809,23 @@ int buf_do_map(int maptype, MapArguments *args, int mode, bool is_abbrev,
// Otherwise we won't be able to find the start of it in a
// vi-compatible way.
//
- if (has_mbyte) {
- int first, last;
- int same = -1;
-
- first = vim_iswordp(lhs);
- last = first;
- p = lhs + (*mb_ptr2len)(lhs);
- n = 1;
- while (p < lhs + len) {
- n++; // nr of (multi-byte) chars
- last = vim_iswordp(p); // type of last char
- if (same == -1 && last != first) {
- same = n - 1; // count of same char type
- }
- p += (*mb_ptr2len)(p);
- }
- if (last && n > 2 && same >= 0 && same < n - 1) {
- retval = 1;
- goto theend;
+ int same = -1;
+
+ const int first = vim_iswordp(lhs);
+ int last = first;
+ p = lhs + utfc_ptr2len(lhs);
+ n = 1;
+ while (p < lhs + len) {
+ n++; // nr of (multi-byte) chars
+ last = vim_iswordp(p); // type of last char
+ if (same == -1 && last != first) {
+ same = n - 1; // count of same char type
}
- } else if (vim_iswordc(lhs[len - 1])) { // ends in keyword char
- for (n = 0; n < len - 2; n++) {
- if (vim_iswordc(lhs[n]) != vim_iswordc(lhs[len - 2])) {
- retval = 1;
- goto theend;
- }
- } // for
+ p += (*mb_ptr2len)(p);
+ }
+ if (last && n > 2 && same >= 0 && same < n - 1) {
+ retval = 1;
+ goto theend;
}
// An abbreviation cannot contain white space.
for (n = 0; n < len; n++) {
@@ -3700,25 +3686,23 @@ int ExpandMappings(regmatch_T *regmatch, int *num_file, char_u ***file)
return count == 0 ? FAIL : OK;
}
-/*
- * Check for an abbreviation.
- * Cursor is at ptr[col].
- * When inserting, mincol is where insert started.
- * For the command line, mincol is what is to be skipped over.
- * "c" is the character typed before check_abbr was called. It may have
- * ABBR_OFF added to avoid prepending a CTRL-V to it.
- *
- * Historic vi practice: The last character of an abbreviation must be an id
- * character ([a-zA-Z0-9_]). The characters in front of it must be all id
- * characters or all non-id characters. This allows for abbr. "#i" to
- * "#include".
- *
- * Vim addition: Allow for abbreviations that end in a non-keyword character.
- * Then there must be white space before the abbr.
- *
- * return TRUE if there is an abbreviation, FALSE if not
- */
-int check_abbr(int c, char_u *ptr, int col, int mincol)
+// Check for an abbreviation.
+// Cursor is at ptr[col].
+// When inserting, mincol is where insert started.
+// For the command line, mincol is what is to be skipped over.
+// "c" is the character typed before check_abbr was called. It may have
+// ABBR_OFF added to avoid prepending a CTRL-V to it.
+//
+// Historic vi practice: The last character of an abbreviation must be an id
+// character ([a-zA-Z0-9_]). The characters in front of it must be all id
+// characters or all non-id characters. This allows for abbr. "#i" to
+// "#include".
+//
+// Vim addition: Allow for abbreviations that end in a non-keyword character.
+// Then there must be white space before the abbr.
+//
+// Return true if there is an abbreviation, false if not.
+bool check_abbr(int c, char_u *ptr, int col, int mincol)
{
int len;
int scol; /* starting column of the abbr. */
@@ -3727,36 +3711,36 @@ int check_abbr(int c, char_u *ptr, int col, int mincol)
char_u tb[MB_MAXBYTES + 4];
mapblock_T *mp;
mapblock_T *mp2;
- int clen = 0; /* length in characters */
- int is_id = TRUE;
- int vim_abbr;
-
- if (typebuf.tb_no_abbr_cnt) /* abbrev. are not recursive */
- return FALSE;
+ int clen = 0; // length in characters
+ bool is_id = true;
- /* no remapping implies no abbreviation, except for CTRL-] */
- if ((KeyNoremap & (RM_NONE|RM_SCRIPT)) != 0 && c != Ctrl_RSB)
- return FALSE;
+ if (typebuf.tb_no_abbr_cnt) { // abbrev. are not recursive
+ return false;
+ }
- /*
- * Check for word before the cursor: If it ends in a keyword char all
- * chars before it must be keyword chars or non-keyword chars, but not
- * white space. If it ends in a non-keyword char we accept any characters
- * before it except white space.
- */
- if (col == 0) /* cannot be an abbr. */
- return FALSE;
+ // no remapping implies no abbreviation, except for CTRL-]
+ if ((KeyNoremap & (RM_NONE|RM_SCRIPT)) != 0 && c != Ctrl_RSB) {
+ return false;
+ }
- if (has_mbyte) {
- char_u *p;
+ // Check for word before the cursor: If it ends in a keyword char all
+ // chars before it must be keyword chars or non-keyword chars, but not
+ // white space. If it ends in a non-keyword char we accept any characters
+ // before it except white space.
+ if (col == 0) { // cannot be an abbr.
+ return false;
+ }
- p = mb_prevptr(ptr, ptr + col);
- if (!vim_iswordp(p))
- vim_abbr = TRUE; /* Vim added abbr. */
- else {
- vim_abbr = FALSE; /* vi compatible abbr. */
- if (p > ptr)
+ {
+ bool vim_abbr;
+ char_u *p = mb_prevptr(ptr, ptr + col);
+ if (!vim_iswordp(p)) {
+ vim_abbr = true; // Vim added abbr.
+ } else {
+ vim_abbr = false; // vi compatible abbr.
+ if (p > ptr) {
is_id = vim_iswordp(mb_prevptr(ptr, p));
+ }
}
clen = 1;
while (p > ptr + mincol) {
@@ -3768,17 +3752,6 @@ int check_abbr(int c, char_u *ptr, int col, int mincol)
++clen;
}
scol = (int)(p - ptr);
- } else {
- if (!vim_iswordc(ptr[col - 1]))
- vim_abbr = TRUE; /* Vim added abbr. */
- else {
- vim_abbr = FALSE; /* vi compatible abbr. */
- if (col > 1)
- is_id = vim_iswordc(ptr[col - 2]);
- }
- for (scol = col - 1; scol > 0 && !ascii_isspace(ptr[scol - 1])
- && (vim_abbr || is_id == vim_iswordc(ptr[scol - 1])); --scol)
- ;
}
if (scol < mincol)
@@ -3866,14 +3839,14 @@ int check_abbr(int c, char_u *ptr, int col, int mincol)
tb[0] = Ctrl_H;
tb[1] = NUL;
- if (has_mbyte)
- len = clen; /* Delete characters instead of bytes */
- while (len-- > 0) /* delete the from string */
- (void)ins_typebuf(tb, 1, 0, TRUE, mp->m_silent);
- return TRUE;
+ len = clen; // Delete characters instead of bytes
+ while (len-- > 0) { // delete the from string
+ (void)ins_typebuf(tb, 1, 0, true, mp->m_silent);
+ }
+ return true;
}
}
- return FALSE;
+ return false;
}
/*
diff --git a/src/nvim/memline.c b/src/nvim/memline.c
index 57ed0d6588..70225484ec 100644
--- a/src/nvim/memline.c
+++ b/src/nvim/memline.c
@@ -4152,9 +4152,7 @@ void goto_byte(long cnt)
check_cursor();
// Make sure the cursor is on the first byte of a multi-byte char.
- if (has_mbyte) {
- mb_adjust_cursor();
- }
+ mb_adjust_cursor();
}
/// Increment the line pointer "lp" crossing line boundaries as necessary.
diff --git a/src/nvim/message.c b/src/nvim/message.c
index 06ba607323..f76a408481 100644
--- a/src/nvim/message.c
+++ b/src/nvim/message.c
@@ -400,12 +400,12 @@ void trunc_string(char_u *s, char_u *buf, int room_in, int buflen)
}
len += n;
buf[e] = s[e];
- if (has_mbyte)
- for (n = (*mb_ptr2len)(s + e); --n > 0; ) {
- if (++e == buflen)
- break;
- buf[e] = s[e];
+ for (n = utfc_ptr2len(s + e); --n > 0; ) {
+ if (++e == buflen) {
+ break;
}
+ buf[e] = s[e];
+ }
}
// Last part: End of the string.
@@ -873,19 +873,17 @@ char_u *msg_may_trunc(int force, char_u *s)
room = (int)(Rows - cmdline_row - 1) * Columns + sc_col - 1;
if ((force || (shortmess(SHM_TRUNC) && !exmode_active))
&& (n = (int)STRLEN(s) - room) > 0) {
- if (has_mbyte) {
- int size = vim_strsize(s);
-
- /* There may be room anyway when there are multibyte chars. */
- if (size <= room)
- return s;
+ int size = vim_strsize(s);
- for (n = 0; size >= room; ) {
- size -= utf_ptr2cells(s + n);
- n += utfc_ptr2len(s + n);
- }
- --n;
+ // There may be room anyway when there are multibyte chars.
+ if (size <= room) {
+ return s;
+ }
+ for (n = 0; size >= room; ) {
+ size -= utf_ptr2cells(s + n);
+ n += utfc_ptr2len(s + n);
}
+ n--;
s += n;
*s = '<';
}
@@ -1430,7 +1428,7 @@ int msg_outtrans_len_attr(const char_u *msgstr, int len, int attr)
// If the string starts with a composing character first draw a space on
// which the composing char can be drawn.
- if (enc_utf8 && utf_iscomposing(utf_ptr2char(msgstr))) {
+ if (utf_iscomposing(utf_ptr2char(msgstr))) {
msg_puts_attr(" ", attr);
}
@@ -2489,8 +2487,9 @@ static void t_puts(int *t_col, const char_u *t_s, const char_u *s, int attr)
*t_col = 0;
/* If the string starts with a composing character don't increment the
* column position for it. */
- if (enc_utf8 && utf_iscomposing(utf_ptr2char(t_s)))
- --msg_col;
+ if (utf_iscomposing(utf_ptr2char(t_s))) {
+ msg_col--;
+ }
if (msg_col >= Columns) {
msg_col = 0;
++msg_row;
@@ -3391,12 +3390,12 @@ do_dialog (
* Copy one character from "*from" to "*to", taking care of multi-byte
* characters. Return the length of the character in bytes.
*/
-static int
-copy_char (
- char_u *from,
+static int copy_char(
+ const char_u *from,
char_u *to,
- int lowercase /* make character lower case */
+ bool lowercase // make character lower case
)
+ FUNC_ATTR_NONNULL_ALL
{
if (lowercase) {
int c = mb_tolower(utf_ptr2char(from));
@@ -3408,7 +3407,7 @@ copy_char (
}
#define HAS_HOTKEY_LEN 30
-#define HOTK_LEN (has_mbyte ? MB_MAXBYTES : 1)
+#define HOTK_LEN MB_MAXBYTES
/// Allocates memory for dialog string & for storing hotkeys
///
@@ -3512,7 +3511,7 @@ static void copy_hotkeys_and_msg(const char_u *message, char_u *buttons,
// Define first default hotkey. Keep the hotkey string NUL
// terminated to avoid reading past the end.
- hotkeys_ptr[copy_char(buttons, hotkeys_ptr, TRUE)] = NUL;
+ hotkeys_ptr[copy_char(buttons, hotkeys_ptr, true)] = NUL;
// Remember where the choices start, displaying starts here when
// "hotkeys_ptr" typed at the more prompt.
@@ -3532,8 +3531,8 @@ static void copy_hotkeys_and_msg(const char_u *message, char_u *buttons,
*msgp++ = ' '; // '\n' -> ', '
// Advance to next hotkey and set default hotkey
- hotkeys_ptr += (has_mbyte) ? STRLEN(hotkeys_ptr): 1;
- hotkeys_ptr[copy_char(r + 1, hotkeys_ptr, TRUE)] = NUL;
+ hotkeys_ptr += STRLEN(hotkeys_ptr);
+ hotkeys_ptr[copy_char(r + 1, hotkeys_ptr, true)] = NUL;
if (default_button_idx) {
default_button_idx--;
@@ -3555,15 +3554,15 @@ static void copy_hotkeys_and_msg(const char_u *message, char_u *buttons,
} else {
// '&a' -> '[a]'
*msgp++ = (default_button_idx == 1) ? '[' : '(';
- msgp += copy_char(r, msgp, FALSE);
+ msgp += copy_char(r, msgp, false);
*msgp++ = (default_button_idx == 1) ? ']' : ')';
// redefine hotkey
- hotkeys_ptr[copy_char(r, hotkeys_ptr, TRUE)] = NUL;
+ hotkeys_ptr[copy_char(r, hotkeys_ptr, true)] = NUL;
}
} else {
// everything else copy literally
- msgp += copy_char(r, msgp, FALSE);
+ msgp += copy_char(r, msgp, false);
}
// advance to the next character
diff --git a/src/nvim/move.c b/src/nvim/move.c
index ccd19a81de..fdcf6bb189 100644
--- a/src/nvim/move.c
+++ b/src/nvim/move.c
@@ -717,11 +717,9 @@ int curwin_col_off2(void)
return win_col_off2(curwin);
}
-/*
- * compute curwin->w_wcol and curwin->w_virtcol.
- * Also updates curwin->w_wrow and curwin->w_cline_row.
- * Also updates curwin->w_leftcol.
- */
+// Compute curwin->w_wcol and curwin->w_virtcol.
+// Also updates curwin->w_wrow and curwin->w_cline_row.
+// Also updates curwin->w_leftcol.
void curs_columns(
int may_scroll /* when true, may scroll horizontally */
)
diff --git a/src/nvim/option.c b/src/nvim/option.c
index fcc051ef1a..0a91687352 100644
--- a/src/nvim/option.c
+++ b/src/nvim/option.c
@@ -1725,14 +1725,15 @@ int do_set(
#endif
)
arg++; // remove backslash
- if (has_mbyte
- && (i = (*mb_ptr2len)(arg)) > 1) {
+ i = utfc_ptr2len(arg);
+ if (i > 1) {
// copy multibyte char
memmove(s, arg, (size_t)i);
arg += i;
s += i;
- } else
+ } else {
*s++ = *arg++;
+ }
}
*s = NUL;
@@ -2864,39 +2865,26 @@ ambw_end:
errmsg = e_invarg;
}
} else if (gvarp == &p_mps) { // 'matchpairs'
- if (has_mbyte) {
- for (p = *varp; *p != NUL; p++) {
- int x2 = -1;
- int x3 = -1;
+ for (p = *varp; *p != NUL; p++) {
+ int x2 = -1;
+ int x3 = -1;
- if (*p != NUL) {
- p += mb_ptr2len(p);
- }
- if (*p != NUL) {
- x2 = *p++;
- }
- if (*p != NUL) {
- x3 = utf_ptr2char(p);
- p += mb_ptr2len(p);
- }
- if (x2 != ':' || x3 == -1 || (*p != NUL && *p != ',')) {
- errmsg = e_invarg;
- break;
- }
- if (*p == NUL) {
- break;
- }
+ if (*p != NUL) {
+ p += utfc_ptr2len(p);
}
- } else {
- // Check for "x:y,x:y"
- for (p = *varp; *p != NUL; p += 4) {
- if (p[1] != ':' || p[2] == NUL || (p[3] != NUL && p[3] != ',')) {
- errmsg = e_invarg;
- break;
- }
- if (p[3] == NUL) {
- break;
- }
+ if (*p != NUL) {
+ x2 = *p++;
+ }
+ if (*p != NUL) {
+ x3 = utf_ptr2char(p);
+ p += utfc_ptr2len(p);
+ }
+ if (x2 != ':' || x3 == -1 || (*p != NUL && *p != ',')) {
+ errmsg = e_invarg;
+ break;
+ }
+ if (*p == NUL) {
+ break;
}
}
} else if (gvarp == &p_com) { // 'comments'
diff --git a/src/nvim/path.c b/src/nvim/path.c
index 31318f6bea..793f917f06 100644
--- a/src/nvim/path.c
+++ b/src/nvim/path.c
@@ -260,13 +260,13 @@ char_u *shorten_dir(char_u *str)
*d++ = *s;
skip = false;
} else if (!skip) {
- *d++ = *s; /* copy next char */
- if (*s != '~' && *s != '.') /* and leading "~" and "." */
+ *d++ = *s; // copy next char
+ if (*s != '~' && *s != '.') { // and leading "~" and "."
skip = true;
- if (has_mbyte) {
- int l = mb_ptr2len(s);
- while (--l > 0)
- *d++ = *++s;
+ }
+ int l = utfc_ptr2len(s);
+ while (--l > 0) {
+ *d++ = *++s;
}
}
}
@@ -608,13 +608,10 @@ static size_t do_path_expand(garray_T *gap, const char_u *path,
)) {
e = p;
}
- if (has_mbyte) {
- len = (size_t)(*mb_ptr2len)(path_end);
- memcpy(p, path_end, len);
- p += len;
- path_end += len;
- } else
- *p++ = *path_end++;
+ len = (size_t)(utfc_ptr2len(path_end));
+ memcpy(p, path_end, len);
+ p += len;
+ path_end += len;
}
e = p;
*e = NUL;
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c
index 6316129c6a..1c88bd4ba4 100644
--- a/src/nvim/regexp.c
+++ b/src/nvim/regexp.c
@@ -797,8 +797,7 @@ static int get_equi_class(char_u **pp)
*/
static void reg_equi_class(int c)
{
- if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
- || STRCMP(p_enc, "iso-8859-15") == 0) {
+ {
switch (c) {
// Do not use '\300' style, it results in a negative number.
case 'A': case 0xc0: case 0xc1: case 0xc2:
@@ -1141,7 +1140,7 @@ static char_u *skip_anyof(char_u *p)
if (*p == ']' || *p == '-')
++p;
while (*p != NUL && *p != ']') {
- if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) {
+ if ((l = (*mb_ptr2len)(p)) > 1) {
p += l;
} else if (*p == '-') {
p++;
@@ -1876,7 +1875,7 @@ static char_u *regatom(int *flagp)
EMSG_RET_NULL(_("E63: invalid use of \\_"));
/* When '.' is followed by a composing char ignore the dot, so that
* the composing char is matched here. */
- if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr())) {
+ if (c == Magic('.') && utf_iscomposing(peekchr())) {
c = getchr();
goto do_multibyte;
}
@@ -2242,11 +2241,7 @@ collection:
if (*regparse == '[')
endc = get_coll_element(&regparse);
if (endc == 0) {
- if (has_mbyte) {
- endc = mb_ptr2char_adv((const char_u **)&regparse);
- } else {
- endc = *regparse++;
- }
+ endc = mb_ptr2char_adv((const char_u **)&regparse);
}
/* Handle \o40, \x20 and \u20AC style sequences */
@@ -2256,8 +2251,8 @@ collection:
if (startc > endc) {
EMSG_RET_NULL(_(e_reverse_range));
}
- if (has_mbyte && ((*mb_char2len)(startc) > 1
- || (*mb_char2len)(endc) > 1)) {
+ if ((*mb_char2len)(startc) > 1
+ || (*mb_char2len)(endc) > 1) {
// Limit to a range of 256 chars
if (endc > startc + 256) {
EMSG_RET_NULL(_(e_large_class));
@@ -2502,9 +2497,9 @@ do_multibyte:
&& !one_exactly
&& !is_Magic(c))); ++len) {
c = no_Magic(c);
- if (has_mbyte) {
+ {
regmbc(c);
- if (enc_utf8) {
+ {
int l;
/* Need to get composing character too. */
@@ -2516,8 +2511,7 @@ do_multibyte:
skipchr();
}
}
- } else
- regc(c);
+ }
c = getchr();
}
ungetchr();
@@ -4248,15 +4242,13 @@ static bool regmatch(
opnd = OPERAND(scan);
// Inline the first byte, for speed.
if (*opnd != *rex.input
- && (!rex.reg_ic
- || (!enc_utf8
- && mb_tolower(*opnd) != mb_tolower(*rex.input)))) {
+ && (!rex.reg_ic)) {
status = RA_NOMATCH;
} else if (*opnd == NUL) {
// match empty string always works; happens when "~" is
// empty.
} else {
- if (opnd[1] == NUL && !(enc_utf8 && rex.reg_ic)) {
+ if (opnd[1] == NUL && !rex.reg_ic) {
len = 1; // matched a single byte above
} else {
// Need to match first byte again for multi-byte.
@@ -4267,7 +4259,7 @@ static bool regmatch(
}
// Check for following composing character, unless %C
// follows (skips over all composing chars).
- if (status != RA_NOMATCH && enc_utf8
+ if (status != RA_NOMATCH
&& UTF_COMPOSINGLIKE(rex.input, rex.input + len)
&& !rex.reg_icombine
&& OP(next) != RE_COMPOSING) {
@@ -4336,7 +4328,7 @@ static bool regmatch(
break;
case RE_COMPOSING:
- if (enc_utf8) {
+ {
// Skip composing characters.
while (utf_iscomposing(utf_ptr2char(rex.input))) {
MB_CPTR_ADV(rex.input);
@@ -5366,9 +5358,10 @@ do_class:
if (got_int) {
break;
}
- } else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1) {
- if (testval != 0)
+ } else if ((l = (*mb_ptr2len)(scan)) > 1) {
+ if (testval != 0) {
break;
+ }
scan += l;
} else if ((class_tab[*scan] & mask) == testval) {
scan++;
@@ -5481,7 +5474,7 @@ do_class:
/* Safety check (just in case 'encoding' was changed since
* compiling the program). */
if ((len = (*mb_ptr2len)(opnd)) > 1) {
- if (rex.reg_ic && enc_utf8) {
+ if (rex.reg_ic) {
cf = utf_fold(utf_ptr2char(opnd));
}
while (count < maxcount && (*mb_ptr2len)(scan) >= len) {
@@ -5490,7 +5483,7 @@ do_class:
break;
}
}
- if (i < len && (!rex.reg_ic || !enc_utf8
+ if (i < len && (!rex.reg_ic
|| utf_fold(utf_ptr2char(scan)) != cf)) {
break;
}
@@ -6383,7 +6376,7 @@ static int cstrncmp(char_u *s1, char_u *s2, int *n)
}
// if it failed and it's utf8 and we want to combineignore:
- if (result != 0 && enc_utf8 && rex.reg_icombine) {
+ if (result != 0 && rex.reg_icombine) {
char_u *str1, *str2;
int c1, c2, c11, c12;
int junk;
@@ -6501,10 +6494,10 @@ char_u *regtilde(char_u *source, int magic)
STRMOVE(p, p + 2); /* remove '\~' */
--p;
} else {
- if (*p == '\\' && p[1]) /* skip escaped characters */
- ++p;
- if (has_mbyte)
- p += (*mb_ptr2len)(p) - 1;
+ if (*p == '\\' && p[1]) { // skip escaped characters
+ p++;
+ }
+ p += (*mb_ptr2len)(p) - 1;
}
}
@@ -6940,7 +6933,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest,
else /* just copy */
cc = c;
- if (has_mbyte) {
+ {
int l;
// Copy composing characters separately, one
@@ -6953,8 +6946,6 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest,
utf_char2bytes(cc, dst);
}
dst += utf_char2len(cc) - 1;
- } else if (copy) {
- *dst = cc;
}
dst++;
}
diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c
index 7cd1ae93d2..7dfd16fb4f 100644
--- a/src/nvim/regexp_nfa.c
+++ b/src/nvim/regexp_nfa.c
@@ -704,8 +704,7 @@ static void nfa_emit_equi_class(int c)
#define EMIT2(c) EMIT(c); EMIT(NFA_CONCAT);
#define EMITMBC(c) EMIT(c); EMIT(NFA_CONCAT);
- if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
- || STRCMP(p_enc, "iso-8859-15") == 0) {
+ {
#define A_grave 0xc0
#define A_acute 0xc1
#define A_circumflex 0xc2
@@ -1246,7 +1245,7 @@ static int nfa_regatom(void)
}
// When '.' is followed by a composing char ignore the dot, so that
// the composing char is matched here.
- if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr())) {
+ if (c == Magic('.') && utf_iscomposing(peekchr())) {
old_regparse = regparse;
c = getchr();
goto nfa_do_multibyte;
@@ -1737,11 +1736,10 @@ collection:
EMIT(endc);
EMIT(NFA_RANGE);
EMIT(NFA_CONCAT);
- } else if (has_mbyte && ((*mb_char2len)(startc) > 1
- || (*mb_char2len)(endc) > 1)) {
- /* Emit the characters in the range.
- * "startc" was already emitted, so skip it.
- * */
+ } else if ((*mb_char2len)(startc) > 1
+ || (*mb_char2len)(endc) > 1) {
+ // Emit the characters in the range.
+ // "startc" was already emitted, so skip it.
for (c = startc + 1; c <= endc; c++) {
EMIT(c);
EMIT(NFA_CONCAT);
@@ -1819,9 +1817,8 @@ collection:
nfa_do_multibyte:
// plen is length of current char with composing chars
- if (enc_utf8 && ((*mb_char2len)(c)
- != (plen = utfc_ptr2len(old_regparse))
- || utf_iscomposing(c))) {
+ if ((*mb_char2len)(c) != (plen = utfc_ptr2len(old_regparse))
+ || utf_iscomposing(c)) {
int i = 0;
/* A base character plus composing characters, or just one
@@ -4995,7 +4992,7 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text)
}
if (match
// check that no composing char follows
- && !(enc_utf8 && utf_iscomposing(PTR2CHAR(s2)))) {
+ && !utf_iscomposing(PTR2CHAR(s2))) {
cleanup_subexpr();
if (REG_MULTI) {
rex.reg_startpos[0].lnum = rex.lnum;
@@ -5248,7 +5245,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
{
// If the match ends before a composing characters and
// rex.reg_icombine is not set, that is not really a match.
- if (enc_utf8 && !rex.reg_icombine && utf_iscomposing(curc)) {
+ if (!rex.reg_icombine && utf_iscomposing(curc)) {
break;
}
nfa_match = true;
@@ -5747,7 +5744,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
case NFA_ANY_COMPOSING:
// On a composing character skip over it. Otherwise do
// nothing. Always matches.
- if (enc_utf8 && utf_iscomposing(curc)) {
+ if (utf_iscomposing(curc)) {
add_off = clen;
} else {
add_here = true;
@@ -6019,7 +6016,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
// Bail out quickly when there can't be a match, avoid the overhead of
// win_linetabsize() on long lines.
- if (op != 1 && col > t->state->val * (has_mbyte ? MB_MAXBYTES : 1)) {
+ if (op != 1 && col > t->state->val * MB_MAXBYTES) {
break;
}
@@ -6132,7 +6129,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start,
// If rex.reg_icombine is not set only skip over the character
// itself. When it is set skip over composing characters.
- if (result && enc_utf8 && !rex.reg_icombine) {
+ if (result && !rex.reg_icombine) {
clen = utf_ptr2len(rex.input);
}
diff --git a/src/nvim/search.c b/src/nvim/search.c
index b25333c9fa..f979889540 100644
--- a/src/nvim/search.c
+++ b/src/nvim/search.c
@@ -218,14 +218,11 @@ char_u *reverse_text(char_u *s) FUNC_ATTR_NONNULL_RET
size_t len = STRLEN(s);
char_u *rev = xmalloc(len + 1);
size_t rev_i = len;
- for (size_t s_i = 0; s_i < len; ++s_i) {
- if (has_mbyte) {
- int mb_len = (*mb_ptr2len)(s + s_i);
- rev_i -= mb_len;
- memmove(rev + rev_i, s + s_i, mb_len);
- s_i += mb_len - 1;
- } else
- rev[--rev_i] = s[s_i];
+ for (size_t s_i = 0; s_i < len; s_i++) {
+ const int mb_len = utfc_ptr2len(s + s_i);
+ rev_i -= mb_len;
+ memmove(rev + rev_i, s + s_i, mb_len);
+ s_i += mb_len - 1;
}
rev[len] = NUL;
@@ -594,8 +591,8 @@ int searchit(
// is zero.
if (pos->col == MAXCOL) {
start_char_len = 0;
- } else if (has_mbyte
- && pos->lnum >= 1 && pos->lnum <= buf->b_ml.ml_line_count
+ } else if (pos->lnum >= 1
+ && pos->lnum <= buf->b_ml.ml_line_count
&& pos->col < MAXCOL - 2) {
// Watch out for the "col" being MAXCOL - 2, used in a closed fold.
ptr = ml_get_buf(buf, pos->lnum, false);
@@ -1553,34 +1550,26 @@ int searchc(cmdarg_T *cap, int t_cmd)
len = (int)STRLEN(p);
while (count--) {
- if (has_mbyte) {
- for (;; ) {
- if (dir > 0) {
- col += (*mb_ptr2len)(p + col);
- if (col >= len)
- return FAIL;
- } else {
- if (col == 0)
- return FAIL;
- col -= utf_head_off(p, p + col - 1) + 1;
+ for (;; ) {
+ if (dir > 0) {
+ col += utfc_ptr2len(p + col);
+ if (col >= len) {
+ return FAIL;
}
- if (lastc_bytelen == 1) {
- if (p[col] == c && stop) {
- break;
- }
- } else if (STRNCMP(p + col, lastc_bytes, lastc_bytelen) == 0 && stop) {
- break;
+ } else {
+ if (col == 0) {
+ return FAIL;
}
- stop = true;
+ col -= utf_head_off(p, p + col - 1) + 1;
}
- } else {
- for (;; ) {
- if ((col += dir) < 0 || col >= len)
- return FAIL;
- if (p[col] == c && stop)
+ if (lastc_bytelen == 1) {
+ if (p[col] == c && stop) {
break;
- stop = TRUE;
+ }
+ } else if (STRNCMP(p + col, lastc_bytes, lastc_bytelen) == 0 && stop) {
+ break;
}
+ stop = true;
}
}
@@ -1964,10 +1953,7 @@ pos_T *findmatchlimit(oparg_T *oap, int initc, int flags, int64_t maxtravel)
if (lisp) /* find comment pos in new line */
comment_col = check_linecomment(linep);
} else {
- if (has_mbyte)
- pos.col += (*mb_ptr2len)(linep + pos.col);
- else
- ++pos.col;
+ pos.col += utfc_ptr2len(linep + pos.col);
}
}
diff --git a/src/nvim/spell.c b/src/nvim/spell.c
index 636c71657d..797fe41320 100644
--- a/src/nvim/spell.c
+++ b/src/nvim/spell.c
@@ -513,10 +513,7 @@ size_t spell_check(
}
}
- if (has_mbyte) {
- return (size_t)(*mb_ptr2len)(ptr);
- }
- return 1;
+ return (size_t)(utfc_ptr2len(ptr));
} else if (mi.mi_end == ptr) {
// Always include at least one character. Required for when there
// is a mixup in "midword".
@@ -722,7 +719,7 @@ static void find_word(matchinf_T *mip, int mode)
// has been found we try compound flags.
bool prefix_found = false;
- if (mode != FIND_KEEPWORD && has_mbyte) {
+ if (mode != FIND_KEEPWORD) {
// Compute byte length in original word, length may change
// when folding case. This can be slow, take a shortcut when the
// case-folded word is equal to the keep-case word.
@@ -796,11 +793,11 @@ static void find_word(matchinf_T *mip, int mode)
continue;
// For multi-byte chars check character length against
// COMPOUNDMIN.
- if (has_mbyte
- && slang->sl_compminlen > 0
+ if (slang->sl_compminlen > 0
&& mb_charlen_len(mip->mi_word + mip->mi_compoff,
- wlen - mip->mi_compoff) < slang->sl_compminlen)
+ wlen - mip->mi_compoff) < slang->sl_compminlen) {
continue;
+ }
// Limit the number of compound words to COMPOUNDWORDMAX if no
// maximum for syllables is specified.
@@ -833,8 +830,7 @@ static void find_word(matchinf_T *mip, int mode)
// Need to check the caps type of the appended compound
// word.
- if (has_mbyte && STRNCMP(ptr, mip->mi_word,
- mip->mi_compoff) != 0) {
+ if (STRNCMP(ptr, mip->mi_word, mip->mi_compoff) != 0) {
// case folding may have changed the length
p = mip->mi_word;
for (char_u *s = ptr; s < ptr + mip->mi_compoff; MB_PTR_ADV(s)) {
@@ -907,7 +903,7 @@ static void find_word(matchinf_T *mip, int mode)
// Find following word in case-folded tree.
mip->mi_compoff = endlen[endidxcnt];
- if (has_mbyte && mode == FIND_KEEPWORD) {
+ if (mode == FIND_KEEPWORD) {
// Compute byte length in case-folded word from "wlen":
// byte length in keep-case word. Length may change when
// folding case. This can be slow, take a shortcut when
@@ -1260,12 +1256,9 @@ static void find_prefix(matchinf_T *mip, int mode)
// Skip over the previously found word(s).
mip->mi_prefixlen += mip->mi_compoff;
- if (has_mbyte) {
- // Case-folded length may differ from original length.
- mip->mi_cprefixlen = nofold_len(mip->mi_fword,
- mip->mi_prefixlen, mip->mi_word);
- } else
- mip->mi_cprefixlen = mip->mi_prefixlen;
+ // Case-folded length may differ from original length.
+ mip->mi_cprefixlen = nofold_len(mip->mi_fword, mip->mi_prefixlen,
+ mip->mi_word);
find_word(mip, FIND_PREFIX);
@@ -2272,35 +2265,30 @@ static void clear_midword(win_T *wp)
// Use the "sl_midword" field of language "lp" for buffer "buf".
// They add up to any currently used midword characters.
static void use_midword(slang_T *lp, win_T *wp)
+ FUNC_ATTR_NONNULL_ALL
{
- char_u *p;
-
- if (lp->sl_midword == NULL) // there aren't any
+ if (lp->sl_midword == NULL) { // there aren't any
return;
+ }
- for (p = lp->sl_midword; *p != NUL; )
- if (has_mbyte) {
- int c, l, n;
- char_u *bp;
-
- c = utf_ptr2char(p);
- l = (*mb_ptr2len)(p);
- if (c < 256 && l <= 2)
- wp->w_s->b_spell_ismw[c] = true;
- else if (wp->w_s->b_spell_ismw_mb == NULL)
- // First multi-byte char in "b_spell_ismw_mb".
- wp->w_s->b_spell_ismw_mb = vim_strnsave(p, l);
- else {
- // Append multi-byte chars to "b_spell_ismw_mb".
- n = (int)STRLEN(wp->w_s->b_spell_ismw_mb);
- bp = vim_strnsave(wp->w_s->b_spell_ismw_mb, n + l);
- xfree(wp->w_s->b_spell_ismw_mb);
- wp->w_s->b_spell_ismw_mb = bp;
- STRLCPY(bp + n, p, l + 1);
- }
- p += l;
- } else
- wp->w_s->b_spell_ismw[*p++] = true;
+ for (char_u *p = lp->sl_midword; *p != NUL; ) {
+ const int c = utf_ptr2char(p);
+ const int l = utfc_ptr2len(p);
+ if (c < 256 && l <= 2) {
+ wp->w_s->b_spell_ismw[c] = true;
+ } else if (wp->w_s->b_spell_ismw_mb == NULL) {
+ // First multi-byte char in "b_spell_ismw_mb".
+ wp->w_s->b_spell_ismw_mb = vim_strnsave(p, l);
+ } else {
+ // Append multi-byte chars to "b_spell_ismw_mb".
+ const int n = (int)STRLEN(wp->w_s->b_spell_ismw_mb);
+ char_u *bp = vim_strnsave(wp->w_s->b_spell_ismw_mb, n + l);
+ xfree(wp->w_s->b_spell_ismw_mb);
+ wp->w_s->b_spell_ismw_mb = bp;
+ STRLCPY(bp + n, p, l + 1);
+ }
+ p += l;
+ }
}
// Find the region "region[2]" in "rp" (points to "sl_regions").
@@ -2333,7 +2321,6 @@ int captype(char_u *word, char_u *end)
FUNC_ATTR_NONNULL_ARG(1)
{
char_u *p;
- int c;
int firstcap;
bool allcap;
bool past_second = false; // past second word char
@@ -2344,11 +2331,7 @@ int captype(char_u *word, char_u *end)
return 0; // only non-word characters, illegal word
}
}
- if (has_mbyte) {
- c = mb_ptr2char_adv((const char_u **)&p);
- } else {
- c = *p++;
- }
+ int c = mb_ptr2char_adv((const char_u **)&p);
firstcap = allcap = SPELL_ISUPPER(c);
// Need to check all letters to find a word with mixed upper/lower.
@@ -2673,34 +2656,23 @@ static bool spell_iswordp_w(const int *p, const win_T *wp)
// Returns FAIL when something wrong.
int spell_casefold(char_u *str, int len, char_u *buf, int buflen)
{
- int i;
-
if (len >= buflen) {
buf[0] = NUL;
return FAIL; // result will not fit
}
- if (has_mbyte) {
- int outi = 0;
- char_u *p;
- int c;
+ int outi = 0;
- // Fold one character at a time.
- for (p = str; p < str + len; ) {
- if (outi + MB_MAXBYTES > buflen) {
- buf[outi] = NUL;
- return FAIL;
- }
- c = mb_cptr2char_adv((const char_u **)&p);
- outi += utf_char2bytes(SPELL_TOFOLD(c), buf + outi);
+ // Fold one character at a time.
+ for (char_u *p = str; p < str + len; ) {
+ if (outi + MB_MAXBYTES > buflen) {
+ buf[outi] = NUL;
+ return FAIL;
}
- buf[outi] = NUL;
- } else {
- // Be quick for non-multibyte encodings.
- for (i = 0; i < len; ++i)
- buf[i] = spelltab.st_fold[str[i]];
- buf[i] = NUL;
+ const int c = mb_cptr2char_adv((const char_u **)&p);
+ outi += utf_char2bytes(SPELL_TOFOLD(c), buf + outi);
}
+ buf[outi] = NUL;
return OK;
}
@@ -3428,22 +3400,14 @@ static void spell_find_cleanup(suginfo_T *su)
/// @param[in] upper True to upper case, otherwise lower case
void onecap_copy(char_u *word, char_u *wcopy, bool upper)
{
- char_u *p;
- int c;
- int l;
-
- p = word;
- if (has_mbyte) {
- c = mb_cptr2char_adv((const char_u **)&p);
- } else {
- c = *p++;
- }
+ char_u *p = word;
+ int c = mb_cptr2char_adv((const char_u **)&p);
if (upper) {
c = SPELL_TOUPPER(c);
} else {
c = SPELL_TOFOLD(c);
}
- l = utf_char2bytes(c, wcopy);
+ int l = utf_char2bytes(c, wcopy);
STRLCPY(wcopy + l, p, MAXWLEN - l);
}
@@ -3451,17 +3415,9 @@ void onecap_copy(char_u *word, char_u *wcopy, bool upper)
// "wcopy[MAXWLEN]". The result is NUL terminated.
static void allcap_copy(char_u *word, char_u *wcopy)
{
- char_u *s;
- char_u *d;
- int c;
-
- d = wcopy;
- for (s = word; *s != NUL; ) {
- if (has_mbyte) {
- c = mb_cptr2char_adv((const char_u **)&s);
- } else {
- c = *s++;
- }
+ char_u *d = wcopy;
+ for (char_u *s = word; *s != NUL; ) {
+ int c = mb_cptr2char_adv((const char_u **)&s);
if (c == 0xdf) {
c = 'S';
@@ -3730,10 +3686,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX) {
// Set su->su_badflags to the caps type at this position.
// Use the caps type until here for the prefix itself.
- if (has_mbyte)
- n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
- else
- n = sp->ts_fidx;
+ n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
flags = badword_captype(su->su_badptr, su->su_badptr + n);
su->su_badflags = badword_captype(su->su_badptr + n,
su->su_badptr + su->su_badlen);
@@ -3851,15 +3804,16 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
// flag).
if (((unsigned)flags >> 24) == 0
|| sp->ts_twordlen - sp->ts_splitoff
- < slang->sl_compminlen)
+ < slang->sl_compminlen) {
break;
+ }
// For multi-byte chars check character length against
// COMPOUNDMIN.
- if (has_mbyte
- && slang->sl_compminlen > 0
+ if (slang->sl_compminlen > 0
&& mb_charlen(tword + sp->ts_splitoff)
- < slang->sl_compminlen)
+ < slang->sl_compminlen) {
break;
+ }
compflags[sp->ts_complen] = ((unsigned)flags >> 24);
compflags[sp->ts_complen + 1] = NUL;
@@ -4014,7 +3968,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
// Try word split and/or compounding.
if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends)
// Don't split in the middle of a character
- && (!has_mbyte || sp->ts_tcharlen == 0)
+ && (sp->ts_tcharlen == 0)
) {
bool try_compound;
int try_split;
@@ -4046,8 +4000,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
&& ((unsigned)flags >> 24) != 0
&& sp->ts_twordlen - sp->ts_splitoff
>= slang->sl_compminlen
- && (!has_mbyte
- || slang->sl_compminlen == 0
+ && (slang->sl_compminlen == 0
|| mb_charlen(tword + sp->ts_splitoff)
>= slang->sl_compminlen)
&& (slang->sl_compsylmax < MAXWLEN
@@ -4166,10 +4119,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
// set su->su_badflags to the caps type at this
// position
- if (has_mbyte)
- n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
- else
- n = sp->ts_fidx;
+ n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
su->su_badflags = badword_captype(su->su_badptr + n,
su->su_badptr + su->su_badlen);
@@ -4266,84 +4216,74 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
++sp->ts_fidx;
tword[sp->ts_twordlen++] = c;
sp->ts_arridx = idxs[arridx];
- if (newscore == SCORE_SUBST)
+ if (newscore == SCORE_SUBST) {
sp->ts_isdiff = DIFF_YES;
- if (has_mbyte) {
- // Multi-byte characters are a bit complicated to
- // handle: They differ when any of the bytes differ
- // and then their length may also differ.
- if (sp->ts_tcharlen == 0) {
- // First byte.
- sp->ts_tcharidx = 0;
- sp->ts_tcharlen = MB_BYTE2LEN(c);
- sp->ts_fcharstart = sp->ts_fidx - 1;
- sp->ts_isdiff = (newscore != 0)
- ? DIFF_YES : DIFF_NONE;
- } else if (sp->ts_isdiff == DIFF_INSERT)
- // When inserting trail bytes don't advance in the
- // bad word.
- --sp->ts_fidx;
- if (++sp->ts_tcharidx == sp->ts_tcharlen) {
- // Last byte of character.
- if (sp->ts_isdiff == DIFF_YES) {
- // Correct ts_fidx for the byte length of the
- // character (we didn't check that before).
- sp->ts_fidx = sp->ts_fcharstart
- + utfc_ptr2len(fword + sp->ts_fcharstart);
-
- // For changing a composing character adjust
- // the score from SCORE_SUBST to
- // SCORE_SUBCOMP.
- if (utf_iscomposing(utf_ptr2char(tword + sp->ts_twordlen
- - sp->ts_tcharlen))
- && utf_iscomposing(utf_ptr2char(fword
- + sp->ts_fcharstart))) {
- sp->ts_score -= SCORE_SUBST - SCORE_SUBCOMP;
- } else if (
- !soundfold
- && slang->sl_has_map
- && similar_chars(
- slang,
- utf_ptr2char(tword + sp->ts_twordlen - sp->ts_tcharlen),
- utf_ptr2char(fword + sp->ts_fcharstart))) {
- // For a similar character adjust score from
- // SCORE_SUBST to SCORE_SIMILAR.
- sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
- }
- } else if (sp->ts_isdiff == DIFF_INSERT
- && sp->ts_twordlen > sp->ts_tcharlen) {
- p = tword + sp->ts_twordlen - sp->ts_tcharlen;
- c = utf_ptr2char(p);
- if (utf_iscomposing(c)) {
- // Inserting a composing char doesn't
- // count that much.
- sp->ts_score -= SCORE_INS - SCORE_INSCOMP;
- } else {
- // If the previous character was the same,
- // thus doubling a character, give a bonus
- // to the score. Also for the soundfold
- // tree (might seem illogical but does
- // give better scores).
- MB_PTR_BACK(tword, p);
- if (c == utf_ptr2char(p)) {
- sp->ts_score -= SCORE_INS - SCORE_INSDUP;
- }
+ }
+ // Multi-byte characters are a bit complicated to
+ // handle: They differ when any of the bytes differ
+ // and then their length may also differ.
+ if (sp->ts_tcharlen == 0) {
+ // First byte.
+ sp->ts_tcharidx = 0;
+ sp->ts_tcharlen = MB_BYTE2LEN(c);
+ sp->ts_fcharstart = sp->ts_fidx - 1;
+ sp->ts_isdiff = (newscore != 0)
+ ? DIFF_YES : DIFF_NONE;
+ } else if (sp->ts_isdiff == DIFF_INSERT) {
+ // When inserting trail bytes don't advance in the
+ // bad word.
+ sp->ts_fidx--;
+ }
+ if (++sp->ts_tcharidx == sp->ts_tcharlen) {
+ // Last byte of character.
+ if (sp->ts_isdiff == DIFF_YES) {
+ // Correct ts_fidx for the byte length of the
+ // character (we didn't check that before).
+ sp->ts_fidx = sp->ts_fcharstart
+ + utfc_ptr2len(fword + sp->ts_fcharstart);
+
+ // For changing a composing character adjust
+ // the score from SCORE_SUBST to
+ // SCORE_SUBCOMP.
+ if (utf_iscomposing(utf_ptr2char(tword + sp->ts_twordlen
+ - sp->ts_tcharlen))
+ && utf_iscomposing(utf_ptr2char(fword
+ + sp->ts_fcharstart))) {
+ sp->ts_score -= SCORE_SUBST - SCORE_SUBCOMP;
+ } else if (
+ !soundfold
+ && slang->sl_has_map
+ && similar_chars(
+ slang,
+ utf_ptr2char(tword + sp->ts_twordlen - sp->ts_tcharlen),
+ utf_ptr2char(fword + sp->ts_fcharstart))) {
+ // For a similar character adjust score from
+ // SCORE_SUBST to SCORE_SIMILAR.
+ sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
+ }
+ } else if (sp->ts_isdiff == DIFF_INSERT
+ && sp->ts_twordlen > sp->ts_tcharlen) {
+ p = tword + sp->ts_twordlen - sp->ts_tcharlen;
+ c = utf_ptr2char(p);
+ if (utf_iscomposing(c)) {
+ // Inserting a composing char doesn't
+ // count that much.
+ sp->ts_score -= SCORE_INS - SCORE_INSCOMP;
+ } else {
+ // If the previous character was the same,
+ // thus doubling a character, give a bonus
+ // to the score. Also for the soundfold
+ // tree (might seem illogical but does
+ // give better scores).
+ MB_PTR_BACK(tword, p);
+ if (c == utf_ptr2char(p)) {
+ sp->ts_score -= SCORE_INS - SCORE_INSDUP;
}
}
-
- // Starting a new char, reset the length.
- sp->ts_tcharlen = 0;
}
- } else {
- // If we found a similar char adjust the score.
- // We do this after calling go_deeper() because
- // it's slow.
- if (newscore != 0
- && !soundfold
- && slang->sl_has_map
- && similar_chars(slang,
- c, fword[sp->ts_fidx - 1]))
- sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
+
+ // Starting a new char, reset the length.
+ sp->ts_tcharlen = 0;
}
}
}
@@ -4352,7 +4292,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
case STATE_DEL:
// When past the first byte of a multi-byte char don't try
// delete/insert/swap a character.
- if (has_mbyte && sp->ts_tcharlen > 0) {
+ if (sp->ts_tcharlen > 0) {
PROF_STORE(sp->ts_state)
sp->ts_state = STATE_FINAL;
break;
@@ -4461,18 +4401,15 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so
sp = &stack[depth];
tword[sp->ts_twordlen++] = c;
sp->ts_arridx = idxs[n];
- if (has_mbyte) {
- fl = MB_BYTE2LEN(c);
- if (fl > 1) {
- // There are following bytes for the same character.
- // We must find all bytes before trying
- // delete/insert/swap/etc.
- sp->ts_tcharlen = fl;
- sp->ts_tcharidx = 1;
- sp->ts_isdiff = DIFF_INSERT;
- }
- } else
- fl = 1;
+ fl = MB_BYTE2LEN(c);
+ if (fl > 1) {
+ // There are following bytes for the same character.
+ // We must find all bytes before trying
+ // delete/insert/swap/etc.
+ sp->ts_tcharlen = fl;
+ sp->ts_tcharidx = 1;
+ sp->ts_isdiff = DIFF_INSERT;
+ }
if (fl == 1) {
// If the previous character was the same, thus doubling a
// character, give a bonus to the score. Also for
@@ -4914,12 +4851,8 @@ static void find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword)
} else {
// round[depth] == 1: Try using the folded-case character.
// round[depth] == 2: Try using the upper-case character.
- if (has_mbyte) {
- flen = MB_CPTR2LEN(fword + fwordidx[depth]);
- ulen = MB_CPTR2LEN(uword + uwordidx[depth]);
- } else {
- ulen = flen = 1;
- }
+ flen = MB_CPTR2LEN(fword + fwordidx[depth]);
+ ulen = MB_CPTR2LEN(uword + uwordidx[depth]);
if (round[depth] == 1) {
p = fword + fwordidx[depth];
l = flen;
@@ -5872,57 +5805,43 @@ void spell_soundfold(slang_T *slang, char_u *inword, bool folded, char_u *res)
// SOFOTO lines.
static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res)
{
- char_u *s;
int ri = 0;
- int c;
- if (has_mbyte) {
- int prevc = 0;
- int *ip;
+ int prevc = 0;
- // The sl_sal_first[] table contains the translation for chars up to
- // 255, sl_sal the rest.
- for (s = inword; *s != NUL; ) {
- c = mb_cptr2char_adv((const char_u **)&s);
- if (utf_class(c) == 0) {
- c = ' ';
- } else if (c < 256) {
- c = slang->sl_sal_first[c];
+ // The sl_sal_first[] table contains the translation for chars up to
+ // 255, sl_sal the rest.
+ for (char_u *s = inword; *s != NUL; ) {
+ int c = mb_cptr2char_adv((const char_u **)&s);
+ if (utf_class(c) == 0) {
+ c = ' ';
+ } else if (c < 256) {
+ c = slang->sl_sal_first[c];
+ } else {
+ int *ip = ((int **)slang->sl_sal.ga_data)[c & 0xff];
+ if (ip == NULL) { // empty list, can't match
+ c = NUL;
} else {
- ip = ((int **)slang->sl_sal.ga_data)[c & 0xff];
- if (ip == NULL) // empty list, can't match
- c = NUL;
- else
- for (;; ) { // find "c" in the list
- if (*ip == 0) { // not found
- c = NUL;
- break;
- }
- if (*ip == c) { // match!
- c = ip[1];
- break;
- }
- ip += 2;
+ for (;; ) { // find "c" in the list
+ if (*ip == 0) { // not found
+ c = NUL;
+ break;
}
- }
-
- if (c != NUL && c != prevc) {
- ri += utf_char2bytes(c, res + ri);
- if (ri + MB_MAXBYTES > MAXWLEN) {
- break;
+ if (*ip == c) { // match!
+ c = ip[1];
+ break;
+ }
+ ip += 2;
}
- prevc = c;
}
}
- } else {
- // The sl_sal_first[] table contains the translation.
- for (s = inword; (c = *s) != NUL; ++s) {
- if (ascii_iswhite(c))
- c = ' ';
- else
- c = slang->sl_sal_first[c];
- if (c != NUL && (ri == 0 || res[ri - 1] != c))
- res[ri++] = c;
+
+ if (c != NUL && c != prevc) {
+ ri += utf_char2bytes(c, res + ri);
+ if (ri + MB_MAXBYTES > MAXWLEN) {
+ break;
+ }
+ prevc = c;
}
}
@@ -6425,12 +6344,11 @@ static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword)
int pbc, pgc;
int wbadword[MAXWLEN];
int wgoodword[MAXWLEN];
- const bool l_has_mbyte = has_mbyte;
// Lengths with NUL.
int badlen;
int goodlen;
- if (l_has_mbyte) {
+ {
// Get the characters from the multi-byte strings and put them in an
// int array for easy access.
badlen = 0;
@@ -6443,9 +6361,6 @@ static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword)
wgoodword[goodlen++] = mb_cptr2char_adv(&p);
}
wgoodword[goodlen++] = 0;
- } else {
- badlen = (int)STRLEN(badword) + 1;
- goodlen = (int)STRLEN(goodword) + 1;
}
// We use "cnt" as an array: CNT(badword_idx, goodword_idx).
@@ -6458,17 +6373,12 @@ static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword)
for (i = 1; i <= badlen; ++i) {
CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL;
- for (j = 1; j <= goodlen; ++j) {
- if (l_has_mbyte) {
- bc = wbadword[i - 1];
- gc = wgoodword[j - 1];
- } else {
- bc = badword[i - 1];
- gc = goodword[j - 1];
- }
- if (bc == gc)
+ for (j = 1; j <= goodlen; j++) {
+ bc = wbadword[i - 1];
+ gc = wgoodword[j - 1];
+ if (bc == gc) {
CNT(i, j) = CNT(i - 1, j - 1);
- else {
+ } else {
// Use a better score when there is only a case difference.
if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
@@ -6483,13 +6393,8 @@ static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword)
}
if (i > 1 && j > 1) {
- if (l_has_mbyte) {
- pbc = wbadword[i - 2];
- pgc = wgoodword[j - 2];
- } else {
- pbc = badword[i - 2];
- pgc = goodword[j - 2];
- }
+ pbc = wbadword[i - 2];
+ pgc = wgoodword[j - 2];
if (bc == pgc && pbc == gc) {
t = SCORE_SWAP + CNT(i - 2, j - 2);
if (t < CNT(i, j))
@@ -6519,147 +6424,7 @@ static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword)
// for multi-byte characters.
static int spell_edit_score_limit(slang_T *slang, char_u *badword, char_u *goodword, int limit)
{
- limitscore_T stack[10]; // allow for over 3 * 2 edits
- int stackidx;
- int bi, gi;
- int bi2, gi2;
- int bc, gc;
- int score;
- int score_off;
- int minscore;
- int round;
-
- // Multi-byte characters require a bit more work, use a different function
- // to avoid testing "has_mbyte" quite often.
- if (has_mbyte)
- return spell_edit_score_limit_w(slang, badword, goodword, limit);
-
- // The idea is to go from start to end over the words. So long as
- // characters are equal just continue, this always gives the lowest score.
- // When there is a difference try several alternatives. Each alternative
- // increases "score" for the edit distance. Some of the alternatives are
- // pushed unto a stack and tried later, some are tried right away. At the
- // end of the word the score for one alternative is known. The lowest
- // possible score is stored in "minscore".
- stackidx = 0;
- bi = 0;
- gi = 0;
- score = 0;
- minscore = limit + 1;
-
- for (;; ) {
- // Skip over an equal part, score remains the same.
- for (;; ) {
- bc = badword[bi];
- gc = goodword[gi];
- if (bc != gc) // stop at a char that's different
- break;
- if (bc == NUL) { // both words end
- if (score < minscore)
- minscore = score;
- goto pop; // do next alternative
- }
- ++bi;
- ++gi;
- }
-
- if (gc == NUL) { // goodword ends, delete badword chars
- do {
- if ((score += SCORE_DEL) >= minscore)
- goto pop; // do next alternative
- } while (badword[++bi] != NUL);
- minscore = score;
- } else if (bc == NUL) { // badword ends, insert badword chars
- do {
- if ((score += SCORE_INS) >= minscore)
- goto pop; // do next alternative
- } while (goodword[++gi] != NUL);
- minscore = score;
- } else { // both words continue
- // If not close to the limit, perform a change. Only try changes
- // that may lead to a lower score than "minscore".
- // round 0: try deleting a char from badword
- // round 1: try inserting a char in badword
- for (round = 0; round <= 1; ++round) {
- score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
- if (score_off < minscore) {
- if (score_off + SCORE_EDIT_MIN >= minscore) {
- // Near the limit, rest of the words must match. We
- // can check that right now, no need to push an item
- // onto the stack.
- bi2 = bi + 1 - round;
- gi2 = gi + round;
- while (goodword[gi2] == badword[bi2]) {
- if (goodword[gi2] == NUL) {
- minscore = score_off;
- break;
- }
- ++bi2;
- ++gi2;
- }
- } else {
- // try deleting/inserting a character later
- stack[stackidx].badi = bi + 1 - round;
- stack[stackidx].goodi = gi + round;
- stack[stackidx].score = score_off;
- ++stackidx;
- }
- }
- }
-
- if (score + SCORE_SWAP < minscore) {
- // If swapping two characters makes a match then the
- // substitution is more expensive, thus there is no need to
- // try both.
- if (gc == badword[bi + 1] && bc == goodword[gi + 1]) {
- // Swap two characters, that is: skip them.
- gi += 2;
- bi += 2;
- score += SCORE_SWAP;
- continue;
- }
- }
-
- // Substitute one character for another which is the same
- // thing as deleting a character from both goodword and badword.
- // Use a better score when there is only a case difference.
- if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
- score += SCORE_ICASE;
- else {
- // For a similar character use SCORE_SIMILAR.
- if (slang != NULL
- && slang->sl_has_map
- && similar_chars(slang, gc, bc))
- score += SCORE_SIMILAR;
- else
- score += SCORE_SUBST;
- }
-
- if (score < minscore) {
- // Do the substitution.
- ++gi;
- ++bi;
- continue;
- }
- }
-pop:
- // Get here to try the next alternative, pop it from the stack.
- if (stackidx == 0) // stack is empty, finished
- break;
-
- // pop an item from the stack
- --stackidx;
- gi = stack[stackidx].goodi;
- bi = stack[stackidx].badi;
- score = stack[stackidx].score;
- }
-
- // When the score goes over "limit" it may actually be much higher.
- // Return a very large number to avoid going below the limit when giving a
- // bonus.
- if (minscore > limit)
- return SCORE_MAXMAX;
- return minscore;
+ return spell_edit_score_limit_w(slang, badword, goodword, limit);
}
// Multi-byte version of spell_edit_score_limit().
diff --git a/src/nvim/spellfile.c b/src/nvim/spellfile.c
index b415a4635b..90af010164 100644
--- a/src/nvim/spellfile.c
+++ b/src/nvim/spellfile.c
@@ -1221,18 +1221,18 @@ static int read_sal_section(FILE *fd, slang_T *slang)
return ccnt;
}
- if (has_mbyte) {
- // convert the multi-byte strings to wide char strings
- smp->sm_lead_w = mb_str2wide(smp->sm_lead);
- smp->sm_leadlen = mb_charlen(smp->sm_lead);
- if (smp->sm_oneof == NULL)
- smp->sm_oneof_w = NULL;
- else
- smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
- if (smp->sm_to == NULL)
- smp->sm_to_w = NULL;
- else
- smp->sm_to_w = mb_str2wide(smp->sm_to);
+ // convert the multi-byte strings to wide char strings
+ smp->sm_lead_w = mb_str2wide(smp->sm_lead);
+ smp->sm_leadlen = mb_charlen(smp->sm_lead);
+ if (smp->sm_oneof == NULL) {
+ smp->sm_oneof_w = NULL;
+ } else {
+ smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
+ }
+ if (smp->sm_to == NULL) {
+ smp->sm_to_w = NULL;
+ } else {
+ smp->sm_to_w = mb_str2wide(smp->sm_to);
}
}
@@ -1488,72 +1488,61 @@ static int read_compound(FILE *fd, slang_T *slang, int len)
// Returns SP_*ERROR flags when there is something wrong.
static int set_sofo(slang_T *lp, char_u *from, char_u *to)
{
- int i;
-
- garray_T *gap;
char_u *s;
char_u *p;
- int c;
- int *inp;
-
- if (has_mbyte) {
- // Use "sl_sal" as an array with 256 pointers to a list of wide
- // characters. The index is the low byte of the character.
- // The list contains from-to pairs with a terminating NUL.
- // sl_sal_first[] is used for latin1 "from" characters.
- gap = &lp->sl_sal;
- ga_init(gap, sizeof(int *), 1);
- ga_grow(gap, 256);
- memset(gap->ga_data, 0, sizeof(int *) * 256);
- gap->ga_len = 256;
-
- // First count the number of items for each list. Temporarily use
- // sl_sal_first[] for this.
- for (p = from, s = to; *p != NUL && *s != NUL; ) {
- c = mb_cptr2char_adv((const char_u **)&p);
- MB_CPTR_ADV(s);
- if (c >= 256) {
- lp->sl_sal_first[c & 0xff]++;
- }
- }
- if (*p != NUL || *s != NUL) // lengths differ
- return SP_FORMERROR;
- // Allocate the lists.
- for (i = 0; i < 256; ++i)
- if (lp->sl_sal_first[i] > 0) {
- p = xmalloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
- ((int **)gap->ga_data)[i] = (int *)p;
- *(int *)p = 0;
- }
+ // Use "sl_sal" as an array with 256 pointers to a list of wide
+ // characters. The index is the low byte of the character.
+ // The list contains from-to pairs with a terminating NUL.
+ // sl_sal_first[] is used for latin1 "from" characters.
+ garray_T *gap = &lp->sl_sal;
+ ga_init(gap, sizeof(int *), 1);
+ ga_grow(gap, 256);
+ memset(gap->ga_data, 0, sizeof(int *) * 256);
+ gap->ga_len = 256;
+
+ // First count the number of items for each list. Temporarily use
+ // sl_sal_first[] for this.
+ for (p = from, s = to; *p != NUL && *s != NUL; ) {
+ const int c = mb_cptr2char_adv((const char_u **)&p);
+ MB_CPTR_ADV(s);
+ if (c >= 256) {
+ lp->sl_sal_first[c & 0xff]++;
+ }
+ }
+ if (*p != NUL || *s != NUL) { // lengths differ
+ return SP_FORMERROR;
+ }
- // Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
- // list.
- memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
- for (p = from, s = to; *p != NUL && *s != NUL; ) {
- c = mb_cptr2char_adv((const char_u **)&p);
- i = mb_cptr2char_adv((const char_u **)&s);
- if (c >= 256) {
- // Append the from-to chars at the end of the list with
- // the low byte.
- inp = ((int **)gap->ga_data)[c & 0xff];
- while (*inp != 0)
- ++inp;
- *inp++ = c; // from char
- *inp++ = i; // to char
- *inp++ = NUL; // NUL at the end
- } else
- // mapping byte to char is done in sl_sal_first[]
- lp->sl_sal_first[c] = i;
+ // Allocate the lists.
+ for (int i = 0; i < 256; i++) {
+ if (lp->sl_sal_first[i] > 0) {
+ p = xmalloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
+ ((int **)gap->ga_data)[i] = (int *)p;
+ *(int *)p = 0;
}
- } else {
- // mapping bytes to bytes is done in sl_sal_first[]
- if (STRLEN(from) != STRLEN(to))
- return SP_FORMERROR;
+ }
- for (i = 0; to[i] != NUL; ++i)
- lp->sl_sal_first[from[i]] = to[i];
- lp->sl_sal.ga_len = 1; // indicates we have soundfolding
+ // Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
+ // list.
+ memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
+ for (p = from, s = to; *p != NUL && *s != NUL; ) {
+ const int c = mb_cptr2char_adv((const char_u **)&p);
+ const int i = mb_cptr2char_adv((const char_u **)&s);
+ if (c >= 256) {
+ // Append the from-to chars at the end of the list with
+ // the low byte.
+ int *inp = ((int **)gap->ga_data)[c & 0xff];
+ while (*inp != 0) {
+ inp++;
+ }
+ *inp++ = c; // from char
+ *inp++ = i; // to char
+ *inp++ = NUL; // NUL at the end
+ } else {
+ // mapping byte to char is done in sl_sal_first[]
+ lp->sl_sal_first[c] = i;
+ }
}
return 0;
@@ -1572,40 +1561,35 @@ static void set_sal_first(slang_T *lp)
sfirst[i] = -1;
}
smp = (salitem_T *)gap->ga_data;
- for (int i = 0; i < gap->ga_len; ++i) {
- if (has_mbyte)
- // Use the lowest byte of the first character. For latin1 it's
- // the character, for other encodings it should differ for most
- // characters.
- c = *smp[i].sm_lead_w & 0xff;
- else
- c = *smp[i].sm_lead;
+ for (int i = 0; i < gap->ga_len; i++) {
+ // Use the lowest byte of the first character. For latin1 it's
+ // the character, for other encodings it should differ for most
+ // characters.
+ c = *smp[i].sm_lead_w & 0xff;
if (sfirst[c] == -1) {
sfirst[c] = i;
- if (has_mbyte) {
- int n;
-
- // Make sure all entries with this byte are following each
- // other. Move the ones that are in the wrong position. Do
- // keep the same ordering!
- while (i + 1 < gap->ga_len
- && (*smp[i + 1].sm_lead_w & 0xff) == c)
- // Skip over entry with same index byte.
- ++i;
-
- for (n = 1; i + n < gap->ga_len; ++n)
- if ((*smp[i + n].sm_lead_w & 0xff) == c) {
- salitem_T tsal;
-
- // Move entry with same index byte after the entries
- // we already found.
- ++i;
- --n;
- tsal = smp[i + n];
- memmove(smp + i + 1, smp + i,
- sizeof(salitem_T) * n);
- smp[i] = tsal;
- }
+
+ // Make sure all entries with this byte are following each
+ // other. Move the ones that are in the wrong position. Do
+ // keep the same ordering!
+ while (i + 1 < gap->ga_len
+ && (*smp[i + 1].sm_lead_w & 0xff) == c) {
+ // Skip over entry with same index byte.
+ i++;
+ }
+
+ for (int n = 1; i + n < gap->ga_len; n++) {
+ if ((*smp[i + n].sm_lead_w & 0xff) == c) {
+ salitem_T tsal;
+
+ // Move entry with same index byte after the entries
+ // we already found.
+ i++;
+ n--;
+ tsal = smp[i + n];
+ memmove(smp + i + 1, smp + i, sizeof(salitem_T) * n);
+ smp[i] = tsal;
+ }
}
}
}
@@ -2454,12 +2438,8 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname)
// upper-case letter.
if (aff_entry->ae_cond != NULL) {
char_u buf[MAXLINELEN];
- if (has_mbyte) {
- onecap_copy(items[4], buf, true);
- aff_entry->ae_cond = getroom_save(
- spin, buf);
- } else
- *aff_entry->ae_cond = c_up;
+ onecap_copy(items[4], buf, true);
+ aff_entry->ae_cond = getroom_save(spin, buf);
if (aff_entry->ae_cond != NULL) {
sprintf((char *)buf, "^%s",
aff_entry->ae_cond);
@@ -3373,13 +3353,9 @@ store_aff_word (
p = word;
if (ae->ae_chop != NULL) {
// Skip chop string.
- if (has_mbyte) {
- i = mb_charlen(ae->ae_chop);
- for (; i > 0; i--) {
- MB_PTR_ADV(p);
- }
- } else {
- p += STRLEN(ae->ae_chop);
+ i = mb_charlen(ae->ae_chop);
+ for (; i > 0; i--) {
+ MB_PTR_ADV(p);
}
}
STRCAT(newword, p);
diff --git a/src/nvim/strings.c b/src/nvim/strings.c
index 2f5491fda5..81a1a68a94 100644
--- a/src/nvim/strings.c
+++ b/src/nvim/strings.c
@@ -94,8 +94,8 @@ char_u *vim_strsave_escaped_ext(const char_u *string, const char_u *esc_chars,
*/
size_t length = 1; // count the trailing NUL
for (const char_u *p = string; *p; p++) {
- size_t l;
- if (has_mbyte && (l = (size_t)(*mb_ptr2len)(p)) > 1) {
+ const size_t l = (size_t)(utfc_ptr2len(p));
+ if (l > 1) {
length += l; // count a multibyte char
p += l - 1;
continue;
@@ -108,8 +108,8 @@ char_u *vim_strsave_escaped_ext(const char_u *string, const char_u *esc_chars,
char_u *escaped_string = xmalloc(length);
char_u *p2 = escaped_string;
for (const char_u *p = string; *p; p++) {
- size_t l;
- if (has_mbyte && (l = (size_t)(*mb_ptr2len)(p)) > 1) {
+ const size_t l = (size_t)(utfc_ptr2len(p));
+ if (l > 1) {
memcpy(p2, p, l);
p2 += l;
p += l - 1; /* skip multibyte char */
@@ -349,7 +349,7 @@ char *strcase_save(const char *const orig, bool upper)
// thus it's OK to do another malloc()/free().
int newl = utf_char2len(uc);
if (newl != l) {
- // TODO(philix): use xrealloc() in strup_save()
+ // TODO(philix): use xrealloc() in strcase_save()
char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l));
memcpy(s, res, (size_t)(p - res));
STRCPY(s + (p - res) + newl, p + l);
diff --git a/src/nvim/syntax.c b/src/nvim/syntax.c
index 5ce126a593..2e593e39de 100644
--- a/src/nvim/syntax.c
+++ b/src/nvim/syntax.c
@@ -2960,11 +2960,7 @@ static int check_keyword_id(
char_u *const kwp = line + startcol;
int kwlen = 0;
do {
- if (has_mbyte) {
- kwlen += (*mb_ptr2len)(kwp + kwlen);
- } else {
- kwlen++;
- }
+ kwlen += utfc_ptr2len(kwp + kwlen);
} while (vim_iswordp_buf(kwp + kwlen, syn_buf));
if (kwlen > MAXKEYWLEN) {
diff --git a/src/nvim/testdir/test_digraph.vim b/src/nvim/testdir/test_digraph.vim
index 1792dcc00b..9eea27740d 100644
--- a/src/nvim/testdir/test_digraph.vim
+++ b/src/nvim/testdir/test_digraph.vim
@@ -479,9 +479,6 @@ endfunc
func Test_show_digraph_cp1251()
throw 'skipped: Nvim supports ''utf8'' encoding only'
- if !has('multi_byte')
- return
- endif
new
set encoding=cp1251
call Put_Dig("='")
diff --git a/src/nvim/testdir/test_plus_arg_edit.vim b/src/nvim/testdir/test_plus_arg_edit.vim
index e91a6e467a..e31680e7b6 100644
--- a/src/nvim/testdir/test_plus_arg_edit.vim
+++ b/src/nvim/testdir/test_plus_arg_edit.vim
@@ -10,10 +10,6 @@ function Test_edit()
endfunction
func Test_edit_bad()
- if !has('multi_byte')
- finish
- endif
-
" Test loading a utf8 file with bad utf8 sequences.
call writefile(["[\xff][\xc0][\xe2\x89\xf0][\xc2\xc2]"], "Xfile")
new
diff --git a/src/nvim/testdir/test_search.vim b/src/nvim/testdir/test_search.vim
index 5db23c22a8..6824c50112 100644
--- a/src/nvim/testdir/test_search.vim
+++ b/src/nvim/testdir/test_search.vim
@@ -1053,7 +1053,7 @@ func Test_search_Ctrl_L_combining()
" ' ̇' U+0307 Dec:775 COMBINING DOT ABOVE &#x307; /\%u307\Z "\u0307"
" ' ̣' U+0323 Dec:803 COMBINING DOT BELOW &#x323; /\%u323 "\u0323"
" Those should also appear on the commandline
- if !has('multi_byte') || !exists('+incsearch')
+ if !exists('+incsearch')
return
endif
call Cmdline3_prep()
diff --git a/src/nvim/window.c b/src/nvim/window.c
index 3429e3df70..4078cd31ac 100644
--- a/src/nvim/window.c
+++ b/src/nvim/window.c
@@ -6151,11 +6151,7 @@ file_name_in_line (
// Skip over the "\" in "\ ".
++len;
}
- if (has_mbyte) {
- len += (size_t)(*mb_ptr2len)(ptr + len);
- } else {
- ++len;
- }
+ len += (size_t)(utfc_ptr2len(ptr + len));
}
/*