diff options
-rw-r--r-- | src/nvim/buffer.c | 73 | ||||
-rw-r--r-- | src/nvim/cursor.c | 8 | ||||
-rw-r--r-- | src/nvim/diff.c | 9 | ||||
-rw-r--r-- | src/nvim/edit.c | 156 | ||||
-rw-r--r-- | src/nvim/eval/funcs.c | 113 | ||||
-rw-r--r-- | src/nvim/ex_cmds.c | 32 | ||||
-rw-r--r-- | src/nvim/ex_docmd.c | 26 | ||||
-rw-r--r-- | src/nvim/ex_getln.c | 78 | ||||
-rw-r--r-- | src/nvim/fileio.c | 63 | ||||
-rw-r--r-- | src/nvim/fold.c | 10 | ||||
-rw-r--r-- | src/nvim/getchar.c | 181 | ||||
-rw-r--r-- | src/nvim/memline.c | 4 | ||||
-rw-r--r-- | src/nvim/message.c | 59 | ||||
-rw-r--r-- | src/nvim/move.c | 8 | ||||
-rw-r--r-- | src/nvim/option.c | 56 | ||||
-rw-r--r-- | src/nvim/path.c | 23 | ||||
-rw-r--r-- | src/nvim/regexp.c | 57 | ||||
-rw-r--r-- | src/nvim/regexp_nfa.c | 29 | ||||
-rw-r--r-- | src/nvim/search.c | 60 | ||||
-rw-r--r-- | src/nvim/spell.c | 575 | ||||
-rw-r--r-- | src/nvim/spellfile.c | 210 | ||||
-rw-r--r-- | src/nvim/strings.c | 10 | ||||
-rw-r--r-- | src/nvim/syntax.c | 6 | ||||
-rw-r--r-- | src/nvim/testdir/test_digraph.vim | 3 | ||||
-rw-r--r-- | src/nvim/testdir/test_plus_arg_edit.vim | 4 | ||||
-rw-r--r-- | src/nvim/testdir/test_search.vim | 2 | ||||
-rw-r--r-- | src/nvim/window.c | 6 |
27 files changed, 694 insertions, 1167 deletions
diff --git a/src/nvim/buffer.c b/src/nvim/buffer.c index 0ebe33f2f8..99cdde300d 100644 --- a/src/nvim/buffer.c +++ b/src/nvim/buffer.c @@ -3334,9 +3334,7 @@ void maketitle(void) len = (int)STRLEN(buf_p); if (len > 100) { len -= 100; - if (has_mbyte) { - len += (*mb_tail_off)(buf_p, buf_p + len) + 1; - } + len += (*mb_tail_off)(buf_p, buf_p + len) + 1; buf_p += len; } STRCPY(icon_str, buf_p); @@ -3661,17 +3659,12 @@ int build_stl_str_hl( // truncate by removing bytes from the start of the group text. if (group_len > stl_items[stl_groupitems[groupdepth]].maxwid) { // { Determine the number of bytes to remove - long n; - if (has_mbyte) { - // Find the first character that should be included. - n = 0; - while (group_len >= stl_items[stl_groupitems[groupdepth]].maxwid) { - group_len -= ptr2cells(t + n); - n += (*mb_ptr2len)(t + n); - } - } else { - n = (long)(out_p - t) - - stl_items[stl_groupitems[groupdepth]].maxwid + 1; + + // Find the first character that should be included. + long n = 0; + while (group_len >= stl_items[stl_groupitems[groupdepth]].maxwid) { + group_len -= ptr2cells(t + n); + n += (*mb_ptr2len)(t + n); } // } @@ -4183,13 +4176,10 @@ int build_stl_str_hl( // If the item is too wide, truncate it from the beginning if (l > maxwid) { - while (l >= maxwid) - if (has_mbyte) { - l -= ptr2cells(t); - t += (*mb_ptr2len)(t); - } else { - l -= byte2cells(*t++); - } + while (l >= maxwid) { + l -= ptr2cells(t); + t += utfc_ptr2len(t); + } // Early out if there isn't enough room for the truncation marker if (out_p >= out_end_p) { @@ -4372,26 +4362,19 @@ int build_stl_str_hl( // If the truncation point we found is beyond the maximum // length of the string, truncate the end of the string. if (width - vim_strsize(trunc_p) >= maxwidth) { - // If we are using a multi-byte encoding, walk from the beginning of the + // Walk from the beginning of the // string to find the last character that will fit. - if (has_mbyte) { - trunc_p = out; - width = 0; - for (;; ) { - width += ptr2cells(trunc_p); - if (width >= maxwidth) { - break; - } - - // Note: Only advance the pointer if the next - // character will fit in the available output space - trunc_p += (*mb_ptr2len)(trunc_p); + trunc_p = out; + width = 0; + for (;; ) { + width += ptr2cells(trunc_p); + if (width >= maxwidth) { + break; } - // Otherwise put the truncation point at the end, leaving enough room - // for a single-character truncation marker - } else { - trunc_p = out + maxwidth - 1; + // Note: Only advance the pointer if the next + // character will fit in the available output space + trunc_p += utfc_ptr2len(trunc_p); } // Ignore any items in the statusline that occur after @@ -4410,16 +4393,10 @@ int build_stl_str_hl( // Truncate at the truncation point we found } else { // { Determine how many bytes to remove - long trunc_len; - if (has_mbyte) { - trunc_len = 0; - while (width >= maxwidth) { - width -= ptr2cells(trunc_p + trunc_len); - trunc_len += (*mb_ptr2len)(trunc_p + trunc_len); - } - } else { - // Truncate an extra character so we can insert our `<`. - trunc_len = (width - maxwidth) + 1; + long trunc_len = 0; + while (width >= maxwidth) { + width -= ptr2cells(trunc_p + trunc_len); + trunc_len += utfc_ptr2len(trunc_p + trunc_len); } // } diff --git a/src/nvim/cursor.c b/src/nvim/cursor.c index d3ffab1759..74a6f77a6d 100644 --- a/src/nvim/cursor.c +++ b/src/nvim/cursor.c @@ -242,9 +242,7 @@ static int coladvance2( } // Prevent from moving onto a trail byte. - if (has_mbyte) { - mark_mb_adjustpos(curbuf, pos); - } + mark_mb_adjustpos(curbuf, pos); if (wcol < 0 || col < wcol) { return FAIL; @@ -378,9 +376,7 @@ void check_cursor_col_win(win_T *win) } else { win->w_cursor.col = len - 1; // Move the cursor to the head byte. - if (has_mbyte) { - mark_mb_adjustpos(win->w_buffer, &win->w_cursor); - } + mark_mb_adjustpos(win->w_buffer, &win->w_cursor); } } else if (win->w_cursor.col < 0) { win->w_cursor.col = 0; diff --git a/src/nvim/diff.c b/src/nvim/diff.c index b9c293f6c8..1cdf84f9d0 100644 --- a/src/nvim/diff.c +++ b/src/nvim/diff.c @@ -719,15 +719,12 @@ static int diff_write_buffer(buf_T *buf, diffin_T *din) for (lnum = 1; lnum <= buf->b_ml.ml_line_count; lnum++) { for (s = ml_get_buf(buf, lnum, false); *s != NUL; ) { if (diff_flags & DIFF_ICASE) { - int c; - - // xdiff doesn't support ignoring case, fold-case the text. - int orig_len; char_u cbuf[MB_MAXBYTES + 1]; - c = PTR2CHAR(s); + // xdiff doesn't support ignoring case, fold-case the text. + int c = PTR2CHAR(s); c = utf_fold(c); - orig_len = utfc_ptr2len(s); + const int orig_len = utfc_ptr2len(s); if (utf_char2bytes(c, cbuf) != orig_len) { // TODO(Bram): handle byte length difference memmove(ptr + len, s, orig_len); diff --git a/src/nvim/edit.c b/src/nvim/edit.c index 9c8d64a6b2..5d44c3274e 100644 --- a/src/nvim/edit.c +++ b/src/nvim/edit.c @@ -426,9 +426,9 @@ static void insert_enter(InsertState *s) || curwin->w_curswant > curwin->w_virtcol) && *(s->ptr = get_cursor_line_ptr() + curwin->w_cursor.col) != NUL) { if (s->ptr[1] == NUL) { - ++curwin->w_cursor.col; - } else if (has_mbyte) { - s->i = (*mb_ptr2len)(s->ptr); + curwin->w_cursor.col++; + } else { + s->i = utfc_ptr2len(s->ptr); if (s->ptr[s->i] == NUL) { curwin->w_cursor.col += s->i; } @@ -1299,11 +1299,10 @@ normalchar: // special character. Let CTRL-] expand abbreviations without // inserting it. if (vim_iswordc(s->c) - || (!echeck_abbr( // Add ABBR_OFF for characters above 0x100, this is // what check_abbr() expects. - (has_mbyte && s->c >= 0x100) ? (s->c + ABBR_OFF) : s->c) - && s->c != Ctrl_RSB)) { + || (!echeck_abbr((s->c >= 0x100) ? (s->c + ABBR_OFF) : s->c) + && s->c != Ctrl_RSB)) { insert_special(s->c, false, false); revins_legal++; revins_chars++; @@ -1574,14 +1573,12 @@ void edit_putchar(int c, bool highlight) pc_status = PC_STATUS_UNSET; if (curwin->w_p_rl) { pc_col += curwin->w_grid.Columns - 1 - curwin->w_wcol; - if (has_mbyte) { - int fix_col = grid_fix_col(&curwin->w_grid, pc_col, pc_row); + const int fix_col = grid_fix_col(&curwin->w_grid, pc_col, pc_row); - if (fix_col != pc_col) { - grid_putchar(&curwin->w_grid, ' ', pc_row, fix_col, attr); - curwin->w_wcol--; - pc_status = PC_STATUS_RIGHT; - } + if (fix_col != pc_col) { + grid_putchar(&curwin->w_grid, ' ', pc_row, fix_col, attr); + curwin->w_wcol--; + pc_status = PC_STATUS_RIGHT; } } else { pc_col += curwin->w_wcol; @@ -1817,10 +1814,11 @@ change_indent ( ptr = get_cursor_line_ptr(); while (vcol <= (int)curwin->w_virtcol) { last_vcol = vcol; - if (has_mbyte && new_cursor_col >= 0) - new_cursor_col += (*mb_ptr2len)(ptr + new_cursor_col); - else - ++new_cursor_col; + if (new_cursor_col >= 0) { + new_cursor_col += utfc_ptr2len(ptr + new_cursor_col); + } else { + new_cursor_col++; + } vcol += lbr_chartabsize(ptr, ptr + new_cursor_col, (colnr_T)vcol); } vcol = last_vcol; @@ -1975,7 +1973,7 @@ void backspace_until_column(int col) /// @return true when something was deleted. static bool del_char_after_col(int limit_col) { - if (enc_utf8 && limit_col >= 0) { + if (limit_col >= 0) { colnr_T ecol = curwin->w_cursor.col + 1; // Make sure the cursor is at the start of a character, but @@ -2174,15 +2172,14 @@ int ins_compl_add_infercase(char_u *str_arg, int len, bool icase, char_u *fname, // Infer case of completed part. // Find actual length of completion. - if (has_mbyte) { + { const char_u *p = str; actual_len = 0; while (*p != NUL) { MB_PTR_ADV(p); actual_len++; } - } else - actual_len = len; + } // Find actual length of original text. { @@ -2204,11 +2201,7 @@ int ins_compl_add_infercase(char_u *str_arg, int len, bool icase, char_u *fname, { const char_u *p = str; for (i = 0; i < actual_len; i++) { - if (has_mbyte) { - wca[i] = mb_ptr2char_adv(&p); - } else { - wca[i] = *(p++); - } + wca[i] = mb_ptr2char_adv(&p); } } @@ -2216,11 +2209,7 @@ int ins_compl_add_infercase(char_u *str_arg, int len, bool icase, char_u *fname, { const char_u *p = compl_orig_text; for (i = 0; i < min_len; i++) { - if (has_mbyte) { - c = mb_ptr2char_adv(&p); - } else { - c = *(p++); - } + c = mb_ptr2char_adv(&p); if (mb_islower(c)) { has_lower = true; if (mb_isupper(wca[i])) { @@ -2241,11 +2230,7 @@ int ins_compl_add_infercase(char_u *str_arg, int len, bool icase, char_u *fname, if (!has_lower) { const char_u *p = compl_orig_text; for (i = 0; i < min_len; i++) { - if (has_mbyte) { - c = mb_ptr2char_adv(&p); - } else { - c = *(p++); - } + c = mb_ptr2char_adv(&p); if (was_letter && mb_isupper(c) && mb_islower(wca[i])) { // Rule 2 is satisfied. for (i = actual_compl_length; i < actual_len; i++) { @@ -2261,11 +2246,7 @@ int ins_compl_add_infercase(char_u *str_arg, int len, bool icase, char_u *fname, { const char_u *p = compl_orig_text; for (i = 0; i < min_len; i++) { - if (has_mbyte) { - c = mb_ptr2char_adv(&p); - } else { - c = *(p++); - } + c = mb_ptr2char_adv(&p); if (mb_islower(c)) { wca[i] = mb_tolower(wca[i]); } else if (mb_isupper(c)) { @@ -3059,12 +3040,9 @@ static void ins_compl_files(int count, char_u **files, int thesaurus, int flags, */ char_u *find_word_start(char_u *ptr) { - if (has_mbyte) - while (*ptr != NUL && *ptr != '\n' && mb_get_class(ptr) <= 1) - ptr += (*mb_ptr2len)(ptr); - else - while (*ptr != NUL && *ptr != '\n' && !vim_iswordc(*ptr)) - ++ptr; + while (*ptr != NUL && *ptr != '\n' && mb_get_class(ptr) <= 1) { + ptr += utfc_ptr2len(ptr); + } return ptr; } @@ -3074,19 +3052,15 @@ char_u *find_word_start(char_u *ptr) */ char_u *find_word_end(char_u *ptr) { - int start_class; - - if (has_mbyte) { - start_class = mb_get_class(ptr); - if (start_class > 1) - while (*ptr != NUL) { - ptr += (*mb_ptr2len)(ptr); - if (mb_get_class(ptr) != start_class) - break; + const int start_class = mb_get_class(ptr); + if (start_class > 1) { + while (*ptr != NUL) { + ptr += utfc_ptr2len(ptr); + if (mb_get_class(ptr) != start_class) { + break; } - } else - while (vim_iswordc(*ptr)) - ++ptr; + } + } return ptr; } @@ -5557,10 +5531,9 @@ static void insert_special(int c, int allow_modmask, int ctrlv) */ # define ISSPECIAL(c) ((c) < ' ' || (c) >= DEL || (c) == '0' || (c) == '^') -# define WHITECHAR(cc) (ascii_iswhite(cc) && \ - (!enc_utf8 || \ - !utf_iscomposing( \ - utf_ptr2char(get_cursor_pos_ptr() + 1)))) +#define WHITECHAR(cc) ( \ + ascii_iswhite(cc) \ + && !utf_iscomposing(utf_ptr2char(get_cursor_pos_ptr() + 1))) /* * "flags": INSCHAR_FORMAT - force formatting @@ -5697,7 +5670,7 @@ void insertchar( // Do the check for InsertCharPre before the call to vpeekc() because the // InsertCharPre autocommand could change the input buffer. if (!ISSPECIAL(c) - && (!has_mbyte || (*mb_char2len)(c) == 1) + && (utf_char2len(c) == 1) && !has_event(EVENT_INSERTCHARPRE) && vpeekc() != NUL && !(State & REPLACE_FLAG) @@ -7175,16 +7148,11 @@ static void replace_do_bs(int limit_col) getvcol(curwin, &curwin->w_cursor, NULL, &start_vcol, NULL); orig_vcols = chartabsize(get_cursor_pos_ptr(), start_vcol); } - if (has_mbyte) { - (void)del_char_after_col(limit_col); - if (l_State & VREPLACE_FLAG) - orig_len = (int)STRLEN(get_cursor_pos_ptr()); - replace_push(cc); - } else { - pchar_cursor(cc); - if (l_State & VREPLACE_FLAG) - orig_len = (int)STRLEN(get_cursor_pos_ptr()) - 1; + (void)del_char_after_col(limit_col); + if (l_State & VREPLACE_FLAG) { + orig_len = (int)STRLEN(get_cursor_pos_ptr()); } + replace_push(cc); replace_pop_ins(); if (l_State & VREPLACE_FLAG) { @@ -7403,23 +7371,17 @@ bool in_cinkeys(int keytyped, int when, bool line_is_empty) bool match = false; if (keytyped == KEY_COMPLETE) { - char_u *s; + char_u *n, *s; /* Just completed a word, check if it starts with "look". * search back for the start of a word. */ line = get_cursor_line_ptr(); - if (has_mbyte) { - char_u *n; - - for (s = line + curwin->w_cursor.col; s > line; s = n) { - n = mb_prevptr(line, s); - if (!vim_iswordp(n)) - break; + for (s = line + curwin->w_cursor.col; s > line; s = n) { + n = mb_prevptr(line, s); + if (!vim_iswordp(n)) { + break; } - } else - for (s = line + curwin->w_cursor.col; s > line; --s) - if (!vim_iswordc(s[-1])) - break; + } assert(p >= look && (uintmax_t)(p - look) <= SIZE_MAX); if (s + (p - look) <= line + curwin->w_cursor.col && (icase @@ -8255,10 +8217,8 @@ static bool ins_bs(int c, int mode, int *inserted_space_p) } cc = gchar_cursor(); // look multi-byte character class - if (has_mbyte) { - prev_cclass = cclass; - cclass = mb_get_class(get_cursor_pos_ptr()); - } + prev_cclass = cclass; + cclass = mb_get_class(get_cursor_pos_ptr()); if (mode == BACKSPACE_WORD && !ascii_isspace(cc)) { // start of word? mode = BACKSPACE_WORD_NOT_SPACE; temp = vim_iswordc(cc); @@ -8272,19 +8232,18 @@ static bool ins_bs(int c, int mode, int *inserted_space_p) } break; } - if (State & REPLACE_FLAG) + if (State & REPLACE_FLAG) { replace_do_bs(-1); - else { - const bool l_enc_utf8 = enc_utf8; + } else { const int l_p_deco = p_deco; - if (l_enc_utf8 && l_p_deco) { + if (l_p_deco) { (void)utfc_ptr2char(get_cursor_pos_ptr(), cpc); } (void)del_char(false); // If there are combining characters and 'delcombine' is set // move the cursor back. Don't back up before the base // character. - if (l_enc_utf8 && l_p_deco && cpc[0] != NUL) { + if (l_p_deco && cpc[0] != NUL) { inc_cursor(); } if (revins_chars) { @@ -8522,13 +8481,10 @@ static void ins_right(void) AppendCharToRedobuff(K_RIGHT); } curwin->w_set_curswant = true; - if (virtual_active()) + if (virtual_active()) { oneright(); - else { - if (has_mbyte) - curwin->w_cursor.col += (*mb_ptr2len)(get_cursor_pos_ptr()); - else - ++curwin->w_cursor.col; + } else { + curwin->w_cursor.col += utfc_ptr2len(get_cursor_pos_ptr()); } revins_legal++; diff --git a/src/nvim/eval/funcs.c b/src/nvim/eval/funcs.c index 679548ab91..eee40965e0 100644 --- a/src/nvim/eval/funcs.c +++ b/src/nvim/eval/funcs.c @@ -783,10 +783,10 @@ static void byteidx(typval_T *argvars, typval_T *rettv, int comp) if (*t == NUL) { // EOL reached. return; } - if (enc_utf8 && comp) { + if (comp) { t += utf_ptr2len((const char_u *)t); } else { - t += (*mb_ptr2len)((const char_u *)t); + t += utfc_ptr2len((const char_u *)t); } } rettv->vval.v_number = (varnumber_T)(t - str); @@ -1427,9 +1427,7 @@ static void f_cursor(typval_T *argvars, typval_T *rettv, FunPtr fptr) // Make sure the cursor is in a valid position. check_cursor(); // Correct cursor for multi-byte character. - if (has_mbyte) { - mb_adjust_cursor(); - } + mb_adjust_cursor(); curwin->w_set_curswant = set_curswant; rettv->vval.v_number = 0; @@ -4198,7 +4196,7 @@ static void f_has(typval_T *argvars, typval_T *rettv, FunPtr fptr) } else if (STRICMP(name, "ttyout") == 0) { n = stdout_isatty; } else if (STRICMP(name, "multi_byte_encoding") == 0) { - n = has_mbyte != 0; + n = true; } else if (STRICMP(name, "syntax_items") == 0) { n = syntax_present(curwin); #ifdef UNIX @@ -8025,14 +8023,9 @@ static void f_setcharsearch(typval_T *argvars, typval_T *rettv, FunPtr fptr) if ((d = argvars[0].vval.v_dict) != NULL) { char_u *const csearch = (char_u *)tv_dict_get_string(d, "char", false); if (csearch != NULL) { - if (enc_utf8) { - int pcc[MAX_MCO]; - int c = utfc_ptr2char(csearch, pcc); - set_last_csearch(c, csearch, utfc_ptr2len(csearch)); - } - else - set_last_csearch(PTR2CHAR(csearch), - csearch, utfc_ptr2len(csearch)); + int pcc[MAX_MCO]; + const int c = utfc_ptr2char(csearch, pcc); + set_last_csearch(c, csearch, utfc_ptr2len(csearch)); } di = tv_dict_find(d, S_LEN("forward")); @@ -10711,72 +10704,54 @@ static void f_tr(typval_T *argvars, typval_T *rettv, FunPtr fptr) garray_T ga; ga_init(&ga, (int)sizeof(char), 80); - if (!has_mbyte) { - // Not multi-byte: fromstr and tostr must be the same length. - if (strlen(fromstr) != strlen(tostr)) { - goto error; - } - } - // fromstr and tostr have to contain the same number of chars. bool first = true; while (*in_str != NUL) { - if (has_mbyte) { - const char *cpstr = in_str; - const int inlen = (*mb_ptr2len)((const char_u *)in_str); - int cplen = inlen; - int idx = 0; - int fromlen; - for (const char *p = fromstr; *p != NUL; p += fromlen) { - fromlen = (*mb_ptr2len)((const char_u *)p); - if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0) { - int tolen; - for (p = tostr; *p != NUL; p += tolen) { - tolen = (*mb_ptr2len)((const char_u *)p); - if (idx-- == 0) { - cplen = tolen; - cpstr = (char *)p; - break; - } - } - if (*p == NUL) { // tostr is shorter than fromstr. - goto error; - } - break; - } - idx++; - } - - if (first && cpstr == in_str) { - // Check that fromstr and tostr have the same number of - // (multi-byte) characters. Done only once when a character - // of in_str doesn't appear in fromstr. - first = false; + const char *cpstr = in_str; + const int inlen = utfc_ptr2len((const char_u *)in_str); + int cplen = inlen; + int idx = 0; + int fromlen; + for (const char *p = fromstr; *p != NUL; p += fromlen) { + fromlen = utfc_ptr2len((const char_u *)p); + if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0) { int tolen; - for (const char *p = tostr; *p != NUL; p += tolen) { - tolen = (*mb_ptr2len)((const char_u *)p); - idx--; + for (p = tostr; *p != NUL; p += tolen) { + tolen = utfc_ptr2len((const char_u *)p); + if (idx-- == 0) { + cplen = tolen; + cpstr = (char *)p; + break; + } } - if (idx != 0) { + if (*p == NUL) { // tostr is shorter than fromstr. goto error; } + break; } + idx++; + } - ga_grow(&ga, cplen); - memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen); - ga.ga_len += cplen; - - in_str += inlen; - } else { - // When not using multi-byte chars we can do it faster. - const char *const p = strchr(fromstr, *in_str); - if (p != NULL) { - ga_append(&ga, tostr[p - fromstr]); - } else { - ga_append(&ga, *in_str); + if (first && cpstr == in_str) { + // Check that fromstr and tostr have the same number of + // (multi-byte) characters. Done only once when a character + // of in_str doesn't appear in fromstr. + first = false; + int tolen; + for (const char *p = tostr; *p != NUL; p += tolen) { + tolen = utfc_ptr2len((const char_u *)p); + idx--; + } + if (idx != 0) { + goto error; } - in_str++; } + + ga_grow(&ga, cplen); + memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen); + ga.ga_len += cplen; + + in_str += inlen; } // add a terminating NUL diff --git a/src/nvim/ex_cmds.c b/src/nvim/ex_cmds.c index 17afb33059..b8a0c3184b 100644 --- a/src/nvim/ex_cmds.c +++ b/src/nvim/ex_cmds.c @@ -795,10 +795,7 @@ void ex_retab(exarg_T *eap) if (ptr[col] == NUL) break; vcol += chartabsize(ptr + col, (colnr_T)vcol); - if (has_mbyte) - col += (*mb_ptr2len)(ptr + col); - else - ++col; + col += utfc_ptr2len(ptr + col); } if (new_line == NULL) /* out of memory */ break; @@ -3465,7 +3462,7 @@ static buf_T *do_sub(exarg_T *eap, proftime_T timeout, int lastone; long nmatch_tl = 0; // nr of lines matched below lnum int do_again; // do it again after joining lines - int skip_match = false; + bool skip_match = false; linenr_T sub_firstlnum; // nr of first sub line /* @@ -3576,16 +3573,13 @@ static buf_T *do_sub(exarg_T *eap, proftime_T timeout, if (matchcol == prev_matchcol && regmatch.endpos[0].lnum == 0 && matchcol == regmatch.endpos[0].col) { - if (sub_firstline[matchcol] == NUL) - /* We already were at the end of the line. Don't look - * for a match in this line again. */ - skip_match = TRUE; - else { - /* search for a match at next column */ - if (has_mbyte) - matchcol += mb_ptr2len(sub_firstline + matchcol); - else - ++matchcol; + if (sub_firstline[matchcol] == NUL) { + // We already were at the end of the line. Don't look + // for a match in this line again. + skip_match = true; + } else { + // search for a match at next column + matchcol += mb_ptr2len(sub_firstline + matchcol); } // match will be pushed to preview_lines, bring it into a proper state current_match.start.col = matchcol; @@ -3609,7 +3603,7 @@ static buf_T *do_sub(exarg_T *eap, proftime_T timeout, if (nmatch > 1) { matchcol = (colnr_T)STRLEN(sub_firstline); nmatch = 1; - skip_match = TRUE; + skip_match = true; } sub_nsubs++; did_sub = TRUE; @@ -3779,7 +3773,7 @@ static buf_T *do_sub(exarg_T *eap, proftime_T timeout, * get stuck when pressing 'n'. */ if (nmatch > 1) { matchcol = (colnr_T)STRLEN(sub_firstline); - skip_match = TRUE; + skip_match = true; } goto skip; } @@ -3956,8 +3950,8 @@ static buf_T *do_sub(exarg_T *eap, proftime_T timeout, STRMOVE(new_start, p1 + 1); p1 = new_start - 1; } - } else if (has_mbyte) { - p1 += (*mb_ptr2len)(p1) - 1; + } else { + p1 += utfc_ptr2len(p1) - 1; } } size_t new_endcol = STRLEN(new_start); diff --git a/src/nvim/ex_docmd.c b/src/nvim/ex_docmd.c index a491a9d377..fc699e8826 100644 --- a/src/nvim/ex_docmd.c +++ b/src/nvim/ex_docmd.c @@ -2505,12 +2505,8 @@ static void append_command(char_u *cmd) STRCAT(IObuff, ": "); d = IObuff + STRLEN(IObuff); while (*s != NUL && d - IObuff < IOSIZE - 7) { - if ( - enc_utf8 ? (s[0] == 0xc2 && s[1] == 0xa0) : - *s == 0xa0) { - s += - enc_utf8 ? 2 : - 1; + if (s[0] == 0xc2 && s[1] == 0xa0) { + s += 2; STRCPY(d, "<a0>"); d += 4; } else @@ -5564,7 +5560,8 @@ static char_u *uc_split_args(char_u *arg, size_t *lenp) break; len += 3; /* "," */ } else { - int charlen = (*mb_ptr2len)(p); + const int charlen = utfc_ptr2len(p); + len += charlen; p += charlen; } @@ -8266,12 +8263,10 @@ static void ex_normal(exarg_T *eap) return; } - /* - * vgetc() expects a CSI and K_SPECIAL to have been escaped. Don't do - * this for the K_SPECIAL leading byte, otherwise special keys will not - * work. - */ - if (has_mbyte) { + // vgetc() expects a CSI and K_SPECIAL to have been escaped. Don't do + // this for the K_SPECIAL leading byte, otherwise special keys will not + // work. + { int len = 0; /* Count the number of characters to be escaped. */ @@ -8310,9 +8305,8 @@ static void ex_normal(exarg_T *eap) check_cursor_moved(curwin); } - exec_normal_cmd( - arg != NULL ? arg : - eap->arg, eap->forceit ? REMAP_NONE : REMAP_YES, FALSE); + exec_normal_cmd(arg != NULL ? arg : eap->arg, + eap->forceit ? REMAP_NONE : REMAP_YES, false); } while (eap->addr_count > 0 && eap->line1 <= eap->line2 && !got_int); } diff --git a/src/nvim/ex_getln.c b/src/nvim/ex_getln.c index 53feffd2d7..9edb826ea6 100644 --- a/src/nvim/ex_getln.c +++ b/src/nvim/ex_getln.c @@ -2096,7 +2096,7 @@ static int command_line_handle_key(CommandLineState *s) s->do_abbr = false; // don't do abbreviation now ccline.special_char = NUL; // may need to remove ^ when composing char was typed - if (enc_utf8 && utf_iscomposing(s->c) && !cmd_silent) { + if (utf_iscomposing(s->c) && !cmd_silent) { if (ui_has(kUICmdline)) { // TODO(bfredl): why not make unputcmdline also work with true? unputcmdline(); @@ -2143,9 +2143,8 @@ static int command_line_handle_key(CommandLineState *s) if (s->do_abbr && (IS_SPECIAL(s->c) || !vim_iswordc(s->c)) // Add ABBR_OFF for characters above 0x100, this is // what check_abbr() expects. - && (ccheck_abbr((has_mbyte && s->c >= 0x100) ? - (s->c + ABBR_OFF) : s->c) - || s->c == Ctrl_RSB)) { + && (ccheck_abbr((s->c >= 0x100) ? (s->c + ABBR_OFF) : s->c) + || s->c == Ctrl_RSB)) { return command_line_changed(s); } @@ -2254,7 +2253,7 @@ static int command_line_changed(CommandLineState *s) may_do_incsearch_highlighting(s->firstc, s->count, &s->is_state); } - if (cmdmsg_rl || (p_arshape && !p_tbidi && enc_utf8)) { + if (cmdmsg_rl || (p_arshape && !p_tbidi)) { // Always redraw the whole command line to fix shaping and // right-left typing. Not efficient, but it works. // Do it only when there are no characters left to read @@ -3139,11 +3138,9 @@ static void draw_cmdline(int start, int len) if (cmdline_star > 0) { for (int i = 0; i < len; i++) { msg_putchar('*'); - if (has_mbyte) { - i += (*mb_ptr2len)(ccline.cmdbuff + start + i) - 1; - } + i += utfc_ptr2len(ccline.cmdbuff + start + i) - 1; } - } else if (p_arshape && !p_tbidi && enc_utf8 && len > 0) { + } else if (p_arshape && !p_tbidi && len > 0) { bool do_arabicshape = false; int mb_l; for (int i = start; i < start + len; i += mb_l) { @@ -3439,32 +3436,31 @@ void put_on_cmdline(char_u *str, int len, int redraw) (size_t)(ccline.cmdlen - ccline.cmdpos)); ccline.cmdlen += len; } else { - if (has_mbyte) { - /* Count nr of characters in the new string. */ - m = 0; - for (i = 0; i < len; i += (*mb_ptr2len)(str + i)) - ++m; - /* Count nr of bytes in cmdline that are overwritten by these - * characters. */ - for (i = ccline.cmdpos; i < ccline.cmdlen && m > 0; - i += (*mb_ptr2len)(ccline.cmdbuff + i)) - --m; - if (i < ccline.cmdlen) { - memmove(ccline.cmdbuff + ccline.cmdpos + len, - ccline.cmdbuff + i, (size_t)(ccline.cmdlen - i)); - ccline.cmdlen += ccline.cmdpos + len - i; - } else - ccline.cmdlen = ccline.cmdpos + len; - } else if (ccline.cmdpos + len > ccline.cmdlen) + // Count nr of characters in the new string. + m = 0; + for (i = 0; i < len; i += utfc_ptr2len(str + i)) { + m++; + } + // Count nr of bytes in cmdline that are overwritten by these + // characters. + for (i = ccline.cmdpos; i < ccline.cmdlen && m > 0; + i += utfc_ptr2len(ccline.cmdbuff + i)) { + m--; + } + if (i < ccline.cmdlen) { + memmove(ccline.cmdbuff + ccline.cmdpos + len, + ccline.cmdbuff + i, (size_t)(ccline.cmdlen - i)); + ccline.cmdlen += ccline.cmdpos + len - i; + } else { ccline.cmdlen = ccline.cmdpos + len; + } } memmove(ccline.cmdbuff + ccline.cmdpos, str, (size_t)len); ccline.cmdbuff[ccline.cmdlen] = NUL; - if (enc_utf8) { - /* When the inserted text starts with a composing character, - * backup to the character before it. There could be two of them. - */ + { + // When the inserted text starts with a composing character, + // backup to the character before it. There could be two of them. i = 0; c = utf_ptr2char(ccline.cmdbuff + ccline.cmdpos); while (ccline.cmdpos > 0 && utf_iscomposing(c)) { @@ -3515,23 +3511,19 @@ void put_on_cmdline(char_u *str, int len, int redraw) for (i = 0; i < len; i++) { c = cmdline_charsize(ccline.cmdpos); // count ">" for a double-wide char that doesn't fit. - if (has_mbyte) { - correct_screencol(ccline.cmdpos, c, &ccline.cmdspos); - } + correct_screencol(ccline.cmdpos, c, &ccline.cmdspos); // Stop cursor at the end of the screen, but do increment the // insert position, so that entering a very long command // works, even though you can't see it. if (ccline.cmdspos + c < m) { ccline.cmdspos += c; } - if (has_mbyte) { - c = (*mb_ptr2len)(ccline.cmdbuff + ccline.cmdpos) - 1; - if (c > len - i - 1) { - c = len - i - 1; - } - ccline.cmdpos += c; - i += c; + c = utfc_ptr2len(ccline.cmdbuff + ccline.cmdpos) - 1; + if (c > len - i - 1) { + c = len - i - 1; } + ccline.cmdpos += c; + i += c; ccline.cmdpos++; } @@ -3676,11 +3668,7 @@ void cmdline_paste_str(char_u *s, int literally) if (cv == Ctrl_V && s[1]) { s++; } - if (has_mbyte) { - c = mb_cptr2char_adv((const char_u **)&s); - } else { - c = *s++; - } + c = mb_cptr2char_adv((const char_u **)&s); if (cv == Ctrl_V || c == ESC || c == Ctrl_C || c == CAR || c == NL || c == Ctrl_L || (c == Ctrl_BSL && *s == Ctrl_N)) { diff --git a/src/nvim/fileio.c b/src/nvim/fileio.c index e349f00fba..49de0f3bf8 100644 --- a/src/nvim/fileio.c +++ b/src/nvim/fileio.c @@ -910,20 +910,18 @@ retry: /* "ucs-bom" means we need to check the first bytes of the file * for a BOM. */ - if (STRCMP(fenc, ENC_UCSBOM) == 0) + if (STRCMP(fenc, ENC_UCSBOM) == 0) { fio_flags = FIO_UCSBOM; - - /* - * Check if UCS-2/4 or Latin1 to UTF-8 conversion needs to be - * done. This is handled below after read(). Prepare the - * fio_flags to avoid having to parse the string each time. - * Also check for Unicode to Latin1 conversion, because iconv() - * appears not to handle this correctly. This works just like - * conversion to UTF-8 except how the resulting character is put in - * the buffer. - */ - else if (enc_utf8 || STRCMP(p_enc, "latin1") == 0) + } else { + // Check if UCS-2/4 or Latin1 to UTF-8 conversion needs to be + // done. This is handled below after read(). Prepare the + // fio_flags to avoid having to parse the string each time. + // Also check for Unicode to Latin1 conversion, because iconv() + // appears not to handle this correctly. This works just like + // conversion to UTF-8 except how the resulting character is put in + // the buffer. fio_flags = get_fio_flags(fenc); + } @@ -932,8 +930,7 @@ retry: if (fio_flags == 0 && !did_iconv ) { - iconv_fd = (iconv_t)my_iconv_open( - enc_utf8 ? (char_u *)"utf-8" : p_enc, fenc); + iconv_fd = (iconv_t)my_iconv_open((char_u *)"utf-8", fenc); } # endif @@ -1202,7 +1199,7 @@ retry: && (fio_flags == FIO_UCSBOM || (!curbuf->b_p_bomb && tmpname == NULL - && (*fenc == 'u' || (*fenc == NUL && enc_utf8))))) { + && (*fenc == 'u' || *fenc == NUL)))) { char_u *ccname; int blen; @@ -1468,8 +1465,8 @@ retry: memmove(line_start, buffer, (size_t)linerest); size = (long)((ptr + real_size) - dest); ptr = dest; - } else if (enc_utf8 && !curbuf->b_p_bin) { - int incomplete_tail = FALSE; + } else if (!curbuf->b_p_bin) { + bool incomplete_tail = false; // Reading UTF-8: Check if the bytes are valid UTF-8. for (p = ptr;; p++) { @@ -1486,15 +1483,16 @@ retry: // then. l = utf_ptr2len_len(p, todo); if (l > todo && !incomplete_tail) { - /* Avoid retrying with a different encoding when - * a truncated file is more likely, or attempting - * to read the rest of an incomplete sequence when - * we have already done so. */ - if (p > ptr || filesize > 0) - incomplete_tail = TRUE; - /* Incomplete byte sequence, move it to conv_rest[] - * and try to read the rest of it, unless we've - * already done so. */ + // Avoid retrying with a different encoding when + // a truncated file is more likely, or attempting + // to read the rest of an incomplete sequence when + // we have already done so. + if (p > ptr || filesize > 0) { + incomplete_tail = true; + } + // Incomplete byte sequence, move it to conv_rest[] + // and try to read the rest of it, unless we've + // already done so. if (p > ptr) { conv_restlen = todo; memmove(conv_rest, p, conv_restlen); @@ -2165,8 +2163,8 @@ readfile_charconvert ( else { close(*fdp); /* close the input file, ignore errors */ *fdp = -1; - if (eval_charconvert((char *) fenc, enc_utf8 ? "utf-8" : (char *) p_enc, - (char *) fname, (char *) tmpname) == FAIL) { + if (eval_charconvert((char *)fenc, "utf-8", + (char *)fname, (char *)tmpname) == FAIL) { errmsg = (char_u *)_("Conversion with 'charconvert' failed"); } if (errmsg == NULL && (*fdp = os_open((char *)tmpname, O_RDONLY, 0)) < 0) { @@ -3067,7 +3065,7 @@ nobackup: // Check if UTF-8 to UCS-2/4 or Latin1 conversion needs to be done. Or // Latin1 to Unicode conversion. This is handled in buf_write_bytes(). // Prepare the flags for it and allocate bw_conv_buf when needed. - if (converted && (enc_utf8 || STRCMP(p_enc, "latin1") == 0)) { + if (converted) { wb_flags = get_fio_flags(fenc); if (wb_flags & (FIO_UCS2 | FIO_UCS4 | FIO_UTF16 | FIO_UTF8)) { // Need to allocate a buffer to translate into. @@ -3089,8 +3087,7 @@ nobackup: # ifdef HAVE_ICONV // Use iconv() conversion when conversion is needed and it's not done // internally. - write_info.bw_iconv_fd = (iconv_t)my_iconv_open(fenc, - enc_utf8 ? (char_u *)"utf-8" : p_enc); + write_info.bw_iconv_fd = (iconv_t)my_iconv_open(fenc, (char_u *)"utf-8"); if (write_info.bw_iconv_fd != (iconv_t)-1) { /* We're going to use iconv(), allocate a buffer to convert in. */ write_info.bw_conv_buflen = bufsize * ICONV_MULT; @@ -3433,7 +3430,7 @@ restore_backup: // The file was written to a temp file, now it needs to be converted // with 'charconvert' to (overwrite) the output file. if (end != 0) { - if (eval_charconvert(enc_utf8 ? "utf-8" : (char *)p_enc, (char *)fenc, + if (eval_charconvert("utf-8", (char *)fenc, (char *)wfname, (char *)fname) == FAIL) { write_info.bw_conv_error = true; end = 0; @@ -4189,7 +4186,7 @@ static bool need_conversion(const char_u *fenc) /* Encodings differ. However, conversion is not needed when 'enc' is any * Unicode encoding and the file is UTF-8. */ - return !(enc_utf8 && fenc_flags == FIO_UTF8); + return !(fenc_flags == FIO_UTF8); } /// Return the FIO_ flags needed for the internal conversion if 'name' was diff --git a/src/nvim/fold.c b/src/nvim/fold.c index 24a73a5b9f..5e28ca6538 100644 --- a/src/nvim/fold.c +++ b/src/nvim/fold.c @@ -1037,11 +1037,11 @@ void foldAdjustVisual(void) if (hasFolding(end->lnum, NULL, &end->lnum)) { ptr = ml_get(end->lnum); end->col = (colnr_T)STRLEN(ptr); - if (end->col > 0 && *p_sel == 'o') - --end->col; - /* prevent cursor from moving on the trail byte */ - if (has_mbyte) - mb_adjust_cursor(); + if (end->col > 0 && *p_sel == 'o') { + end->col--; + } + // prevent cursor from moving on the trail byte + mb_adjust_cursor(); } } diff --git a/src/nvim/getchar.c b/src/nvim/getchar.c index 456979be00..a5c81b2795 100644 --- a/src/nvim/getchar.c +++ b/src/nvim/getchar.c @@ -563,9 +563,7 @@ void AppendToRedobuffLit(const char_u *str, int len) // Handle a special or multibyte character. // Composing chars separately are handled separately. - const int c = (has_mbyte - ? mb_cptr2char_adv((const char_u **)&s) - : (uint8_t)(*s++)); + const int c = mb_cptr2char_adv((const char_u **)&s); if (c < ' ' || c == DEL || (*s == NUL && (c == '0' || c == '^'))) { add_char_buff(&redobuff, Ctrl_V); } @@ -684,15 +682,16 @@ static int read_redo(bool init, bool old_redo) if ((c = *p) == NUL) { return c; } - /* Reverse the conversion done by add_char_buff() */ - /* For a multi-byte character get all the bytes and return the - * converted character. */ - if (has_mbyte && (c != K_SPECIAL || p[1] == KS_SPECIAL)) + // Reverse the conversion done by add_char_buff() */ + // For a multi-byte character get all the bytes and return the + // converted character. + if (c != K_SPECIAL || p[1] == KS_SPECIAL) { n = MB_BYTE2LEN_CHECK(c); - else + } else { n = 1; - for (i = 0;; ++i) { - if (c == K_SPECIAL) { /* special key or escaped K_SPECIAL */ + } + for (i = 0;; i++) { + if (c == K_SPECIAL) { // special key or escaped K_SPECIAL c = TO_SPECIAL(p[1], p[2]); p += 2; } @@ -2161,14 +2160,11 @@ static int vgetorpeek(bool advance) col = vcol = curwin->w_wcol = 0; ptr = get_cursor_line_ptr(); while (col < curwin->w_cursor.col) { - if (!ascii_iswhite(ptr[col])) + if (!ascii_iswhite(ptr[col])) { curwin->w_wcol = vcol; - vcol += lbr_chartabsize(ptr, ptr + col, - (colnr_T)vcol); - if (has_mbyte) - col += (*mb_ptr2len)(ptr + col); - else - ++col; + } + vcol += lbr_chartabsize(ptr, ptr + col, (colnr_T)vcol); + col += utfc_ptr2len(ptr + col); } curwin->w_wrow = curwin->w_cline_row + curwin->w_wcol / curwin->w_width_inner; @@ -2813,33 +2809,23 @@ int buf_do_map(int maptype, MapArguments *args, int mode, bool is_abbrev, // Otherwise we won't be able to find the start of it in a // vi-compatible way. // - if (has_mbyte) { - int first, last; - int same = -1; - - first = vim_iswordp(lhs); - last = first; - p = lhs + (*mb_ptr2len)(lhs); - n = 1; - while (p < lhs + len) { - n++; // nr of (multi-byte) chars - last = vim_iswordp(p); // type of last char - if (same == -1 && last != first) { - same = n - 1; // count of same char type - } - p += (*mb_ptr2len)(p); - } - if (last && n > 2 && same >= 0 && same < n - 1) { - retval = 1; - goto theend; + int same = -1; + + const int first = vim_iswordp(lhs); + int last = first; + p = lhs + utfc_ptr2len(lhs); + n = 1; + while (p < lhs + len) { + n++; // nr of (multi-byte) chars + last = vim_iswordp(p); // type of last char + if (same == -1 && last != first) { + same = n - 1; // count of same char type } - } else if (vim_iswordc(lhs[len - 1])) { // ends in keyword char - for (n = 0; n < len - 2; n++) { - if (vim_iswordc(lhs[n]) != vim_iswordc(lhs[len - 2])) { - retval = 1; - goto theend; - } - } // for + p += (*mb_ptr2len)(p); + } + if (last && n > 2 && same >= 0 && same < n - 1) { + retval = 1; + goto theend; } // An abbreviation cannot contain white space. for (n = 0; n < len; n++) { @@ -3700,25 +3686,23 @@ int ExpandMappings(regmatch_T *regmatch, int *num_file, char_u ***file) return count == 0 ? FAIL : OK; } -/* - * Check for an abbreviation. - * Cursor is at ptr[col]. - * When inserting, mincol is where insert started. - * For the command line, mincol is what is to be skipped over. - * "c" is the character typed before check_abbr was called. It may have - * ABBR_OFF added to avoid prepending a CTRL-V to it. - * - * Historic vi practice: The last character of an abbreviation must be an id - * character ([a-zA-Z0-9_]). The characters in front of it must be all id - * characters or all non-id characters. This allows for abbr. "#i" to - * "#include". - * - * Vim addition: Allow for abbreviations that end in a non-keyword character. - * Then there must be white space before the abbr. - * - * return TRUE if there is an abbreviation, FALSE if not - */ -int check_abbr(int c, char_u *ptr, int col, int mincol) +// Check for an abbreviation. +// Cursor is at ptr[col]. +// When inserting, mincol is where insert started. +// For the command line, mincol is what is to be skipped over. +// "c" is the character typed before check_abbr was called. It may have +// ABBR_OFF added to avoid prepending a CTRL-V to it. +// +// Historic vi practice: The last character of an abbreviation must be an id +// character ([a-zA-Z0-9_]). The characters in front of it must be all id +// characters or all non-id characters. This allows for abbr. "#i" to +// "#include". +// +// Vim addition: Allow for abbreviations that end in a non-keyword character. +// Then there must be white space before the abbr. +// +// Return true if there is an abbreviation, false if not. +bool check_abbr(int c, char_u *ptr, int col, int mincol) { int len; int scol; /* starting column of the abbr. */ @@ -3727,36 +3711,36 @@ int check_abbr(int c, char_u *ptr, int col, int mincol) char_u tb[MB_MAXBYTES + 4]; mapblock_T *mp; mapblock_T *mp2; - int clen = 0; /* length in characters */ - int is_id = TRUE; - int vim_abbr; - - if (typebuf.tb_no_abbr_cnt) /* abbrev. are not recursive */ - return FALSE; + int clen = 0; // length in characters + bool is_id = true; - /* no remapping implies no abbreviation, except for CTRL-] */ - if ((KeyNoremap & (RM_NONE|RM_SCRIPT)) != 0 && c != Ctrl_RSB) - return FALSE; + if (typebuf.tb_no_abbr_cnt) { // abbrev. are not recursive + return false; + } - /* - * Check for word before the cursor: If it ends in a keyword char all - * chars before it must be keyword chars or non-keyword chars, but not - * white space. If it ends in a non-keyword char we accept any characters - * before it except white space. - */ - if (col == 0) /* cannot be an abbr. */ - return FALSE; + // no remapping implies no abbreviation, except for CTRL-] + if ((KeyNoremap & (RM_NONE|RM_SCRIPT)) != 0 && c != Ctrl_RSB) { + return false; + } - if (has_mbyte) { - char_u *p; + // Check for word before the cursor: If it ends in a keyword char all + // chars before it must be keyword chars or non-keyword chars, but not + // white space. If it ends in a non-keyword char we accept any characters + // before it except white space. + if (col == 0) { // cannot be an abbr. + return false; + } - p = mb_prevptr(ptr, ptr + col); - if (!vim_iswordp(p)) - vim_abbr = TRUE; /* Vim added abbr. */ - else { - vim_abbr = FALSE; /* vi compatible abbr. */ - if (p > ptr) + { + bool vim_abbr; + char_u *p = mb_prevptr(ptr, ptr + col); + if (!vim_iswordp(p)) { + vim_abbr = true; // Vim added abbr. + } else { + vim_abbr = false; // vi compatible abbr. + if (p > ptr) { is_id = vim_iswordp(mb_prevptr(ptr, p)); + } } clen = 1; while (p > ptr + mincol) { @@ -3768,17 +3752,6 @@ int check_abbr(int c, char_u *ptr, int col, int mincol) ++clen; } scol = (int)(p - ptr); - } else { - if (!vim_iswordc(ptr[col - 1])) - vim_abbr = TRUE; /* Vim added abbr. */ - else { - vim_abbr = FALSE; /* vi compatible abbr. */ - if (col > 1) - is_id = vim_iswordc(ptr[col - 2]); - } - for (scol = col - 1; scol > 0 && !ascii_isspace(ptr[scol - 1]) - && (vim_abbr || is_id == vim_iswordc(ptr[scol - 1])); --scol) - ; } if (scol < mincol) @@ -3866,14 +3839,14 @@ int check_abbr(int c, char_u *ptr, int col, int mincol) tb[0] = Ctrl_H; tb[1] = NUL; - if (has_mbyte) - len = clen; /* Delete characters instead of bytes */ - while (len-- > 0) /* delete the from string */ - (void)ins_typebuf(tb, 1, 0, TRUE, mp->m_silent); - return TRUE; + len = clen; // Delete characters instead of bytes + while (len-- > 0) { // delete the from string + (void)ins_typebuf(tb, 1, 0, true, mp->m_silent); + } + return true; } } - return FALSE; + return false; } /* diff --git a/src/nvim/memline.c b/src/nvim/memline.c index 57ed0d6588..70225484ec 100644 --- a/src/nvim/memline.c +++ b/src/nvim/memline.c @@ -4152,9 +4152,7 @@ void goto_byte(long cnt) check_cursor(); // Make sure the cursor is on the first byte of a multi-byte char. - if (has_mbyte) { - mb_adjust_cursor(); - } + mb_adjust_cursor(); } /// Increment the line pointer "lp" crossing line boundaries as necessary. diff --git a/src/nvim/message.c b/src/nvim/message.c index 06ba607323..f76a408481 100644 --- a/src/nvim/message.c +++ b/src/nvim/message.c @@ -400,12 +400,12 @@ void trunc_string(char_u *s, char_u *buf, int room_in, int buflen) } len += n; buf[e] = s[e]; - if (has_mbyte) - for (n = (*mb_ptr2len)(s + e); --n > 0; ) { - if (++e == buflen) - break; - buf[e] = s[e]; + for (n = utfc_ptr2len(s + e); --n > 0; ) { + if (++e == buflen) { + break; } + buf[e] = s[e]; + } } // Last part: End of the string. @@ -873,19 +873,17 @@ char_u *msg_may_trunc(int force, char_u *s) room = (int)(Rows - cmdline_row - 1) * Columns + sc_col - 1; if ((force || (shortmess(SHM_TRUNC) && !exmode_active)) && (n = (int)STRLEN(s) - room) > 0) { - if (has_mbyte) { - int size = vim_strsize(s); - - /* There may be room anyway when there are multibyte chars. */ - if (size <= room) - return s; + int size = vim_strsize(s); - for (n = 0; size >= room; ) { - size -= utf_ptr2cells(s + n); - n += utfc_ptr2len(s + n); - } - --n; + // There may be room anyway when there are multibyte chars. + if (size <= room) { + return s; + } + for (n = 0; size >= room; ) { + size -= utf_ptr2cells(s + n); + n += utfc_ptr2len(s + n); } + n--; s += n; *s = '<'; } @@ -1430,7 +1428,7 @@ int msg_outtrans_len_attr(const char_u *msgstr, int len, int attr) // If the string starts with a composing character first draw a space on // which the composing char can be drawn. - if (enc_utf8 && utf_iscomposing(utf_ptr2char(msgstr))) { + if (utf_iscomposing(utf_ptr2char(msgstr))) { msg_puts_attr(" ", attr); } @@ -2489,8 +2487,9 @@ static void t_puts(int *t_col, const char_u *t_s, const char_u *s, int attr) *t_col = 0; /* If the string starts with a composing character don't increment the * column position for it. */ - if (enc_utf8 && utf_iscomposing(utf_ptr2char(t_s))) - --msg_col; + if (utf_iscomposing(utf_ptr2char(t_s))) { + msg_col--; + } if (msg_col >= Columns) { msg_col = 0; ++msg_row; @@ -3391,12 +3390,12 @@ do_dialog ( * Copy one character from "*from" to "*to", taking care of multi-byte * characters. Return the length of the character in bytes. */ -static int -copy_char ( - char_u *from, +static int copy_char( + const char_u *from, char_u *to, - int lowercase /* make character lower case */ + bool lowercase // make character lower case ) + FUNC_ATTR_NONNULL_ALL { if (lowercase) { int c = mb_tolower(utf_ptr2char(from)); @@ -3408,7 +3407,7 @@ copy_char ( } #define HAS_HOTKEY_LEN 30 -#define HOTK_LEN (has_mbyte ? MB_MAXBYTES : 1) +#define HOTK_LEN MB_MAXBYTES /// Allocates memory for dialog string & for storing hotkeys /// @@ -3512,7 +3511,7 @@ static void copy_hotkeys_and_msg(const char_u *message, char_u *buttons, // Define first default hotkey. Keep the hotkey string NUL // terminated to avoid reading past the end. - hotkeys_ptr[copy_char(buttons, hotkeys_ptr, TRUE)] = NUL; + hotkeys_ptr[copy_char(buttons, hotkeys_ptr, true)] = NUL; // Remember where the choices start, displaying starts here when // "hotkeys_ptr" typed at the more prompt. @@ -3532,8 +3531,8 @@ static void copy_hotkeys_and_msg(const char_u *message, char_u *buttons, *msgp++ = ' '; // '\n' -> ', ' // Advance to next hotkey and set default hotkey - hotkeys_ptr += (has_mbyte) ? STRLEN(hotkeys_ptr): 1; - hotkeys_ptr[copy_char(r + 1, hotkeys_ptr, TRUE)] = NUL; + hotkeys_ptr += STRLEN(hotkeys_ptr); + hotkeys_ptr[copy_char(r + 1, hotkeys_ptr, true)] = NUL; if (default_button_idx) { default_button_idx--; @@ -3555,15 +3554,15 @@ static void copy_hotkeys_and_msg(const char_u *message, char_u *buttons, } else { // '&a' -> '[a]' *msgp++ = (default_button_idx == 1) ? '[' : '('; - msgp += copy_char(r, msgp, FALSE); + msgp += copy_char(r, msgp, false); *msgp++ = (default_button_idx == 1) ? ']' : ')'; // redefine hotkey - hotkeys_ptr[copy_char(r, hotkeys_ptr, TRUE)] = NUL; + hotkeys_ptr[copy_char(r, hotkeys_ptr, true)] = NUL; } } else { // everything else copy literally - msgp += copy_char(r, msgp, FALSE); + msgp += copy_char(r, msgp, false); } // advance to the next character diff --git a/src/nvim/move.c b/src/nvim/move.c index ccd19a81de..fdcf6bb189 100644 --- a/src/nvim/move.c +++ b/src/nvim/move.c @@ -717,11 +717,9 @@ int curwin_col_off2(void) return win_col_off2(curwin); } -/* - * compute curwin->w_wcol and curwin->w_virtcol. - * Also updates curwin->w_wrow and curwin->w_cline_row. - * Also updates curwin->w_leftcol. - */ +// Compute curwin->w_wcol and curwin->w_virtcol. +// Also updates curwin->w_wrow and curwin->w_cline_row. +// Also updates curwin->w_leftcol. void curs_columns( int may_scroll /* when true, may scroll horizontally */ ) diff --git a/src/nvim/option.c b/src/nvim/option.c index fcc051ef1a..0a91687352 100644 --- a/src/nvim/option.c +++ b/src/nvim/option.c @@ -1725,14 +1725,15 @@ int do_set( #endif ) arg++; // remove backslash - if (has_mbyte - && (i = (*mb_ptr2len)(arg)) > 1) { + i = utfc_ptr2len(arg); + if (i > 1) { // copy multibyte char memmove(s, arg, (size_t)i); arg += i; s += i; - } else + } else { *s++ = *arg++; + } } *s = NUL; @@ -2864,39 +2865,26 @@ ambw_end: errmsg = e_invarg; } } else if (gvarp == &p_mps) { // 'matchpairs' - if (has_mbyte) { - for (p = *varp; *p != NUL; p++) { - int x2 = -1; - int x3 = -1; + for (p = *varp; *p != NUL; p++) { + int x2 = -1; + int x3 = -1; - if (*p != NUL) { - p += mb_ptr2len(p); - } - if (*p != NUL) { - x2 = *p++; - } - if (*p != NUL) { - x3 = utf_ptr2char(p); - p += mb_ptr2len(p); - } - if (x2 != ':' || x3 == -1 || (*p != NUL && *p != ',')) { - errmsg = e_invarg; - break; - } - if (*p == NUL) { - break; - } + if (*p != NUL) { + p += utfc_ptr2len(p); } - } else { - // Check for "x:y,x:y" - for (p = *varp; *p != NUL; p += 4) { - if (p[1] != ':' || p[2] == NUL || (p[3] != NUL && p[3] != ',')) { - errmsg = e_invarg; - break; - } - if (p[3] == NUL) { - break; - } + if (*p != NUL) { + x2 = *p++; + } + if (*p != NUL) { + x3 = utf_ptr2char(p); + p += utfc_ptr2len(p); + } + if (x2 != ':' || x3 == -1 || (*p != NUL && *p != ',')) { + errmsg = e_invarg; + break; + } + if (*p == NUL) { + break; } } } else if (gvarp == &p_com) { // 'comments' diff --git a/src/nvim/path.c b/src/nvim/path.c index 31318f6bea..793f917f06 100644 --- a/src/nvim/path.c +++ b/src/nvim/path.c @@ -260,13 +260,13 @@ char_u *shorten_dir(char_u *str) *d++ = *s; skip = false; } else if (!skip) { - *d++ = *s; /* copy next char */ - if (*s != '~' && *s != '.') /* and leading "~" and "." */ + *d++ = *s; // copy next char + if (*s != '~' && *s != '.') { // and leading "~" and "." skip = true; - if (has_mbyte) { - int l = mb_ptr2len(s); - while (--l > 0) - *d++ = *++s; + } + int l = utfc_ptr2len(s); + while (--l > 0) { + *d++ = *++s; } } } @@ -608,13 +608,10 @@ static size_t do_path_expand(garray_T *gap, const char_u *path, )) { e = p; } - if (has_mbyte) { - len = (size_t)(*mb_ptr2len)(path_end); - memcpy(p, path_end, len); - p += len; - path_end += len; - } else - *p++ = *path_end++; + len = (size_t)(utfc_ptr2len(path_end)); + memcpy(p, path_end, len); + p += len; + path_end += len; } e = p; *e = NUL; diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 6316129c6a..1c88bd4ba4 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -797,8 +797,7 @@ static int get_equi_class(char_u **pp) */ static void reg_equi_class(int c) { - if (enc_utf8 || STRCMP(p_enc, "latin1") == 0 - || STRCMP(p_enc, "iso-8859-15") == 0) { + { switch (c) { // Do not use '\300' style, it results in a negative number. case 'A': case 0xc0: case 0xc1: case 0xc2: @@ -1141,7 +1140,7 @@ static char_u *skip_anyof(char_u *p) if (*p == ']' || *p == '-') ++p; while (*p != NUL && *p != ']') { - if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1) { + if ((l = (*mb_ptr2len)(p)) > 1) { p += l; } else if (*p == '-') { p++; @@ -1876,7 +1875,7 @@ static char_u *regatom(int *flagp) EMSG_RET_NULL(_("E63: invalid use of \\_")); /* When '.' is followed by a composing char ignore the dot, so that * the composing char is matched here. */ - if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr())) { + if (c == Magic('.') && utf_iscomposing(peekchr())) { c = getchr(); goto do_multibyte; } @@ -2242,11 +2241,7 @@ collection: if (*regparse == '[') endc = get_coll_element(®parse); if (endc == 0) { - if (has_mbyte) { - endc = mb_ptr2char_adv((const char_u **)®parse); - } else { - endc = *regparse++; - } + endc = mb_ptr2char_adv((const char_u **)®parse); } /* Handle \o40, \x20 and \u20AC style sequences */ @@ -2256,8 +2251,8 @@ collection: if (startc > endc) { EMSG_RET_NULL(_(e_reverse_range)); } - if (has_mbyte && ((*mb_char2len)(startc) > 1 - || (*mb_char2len)(endc) > 1)) { + if ((*mb_char2len)(startc) > 1 + || (*mb_char2len)(endc) > 1) { // Limit to a range of 256 chars if (endc > startc + 256) { EMSG_RET_NULL(_(e_large_class)); @@ -2502,9 +2497,9 @@ do_multibyte: && !one_exactly && !is_Magic(c))); ++len) { c = no_Magic(c); - if (has_mbyte) { + { regmbc(c); - if (enc_utf8) { + { int l; /* Need to get composing character too. */ @@ -2516,8 +2511,7 @@ do_multibyte: skipchr(); } } - } else - regc(c); + } c = getchr(); } ungetchr(); @@ -4248,15 +4242,13 @@ static bool regmatch( opnd = OPERAND(scan); // Inline the first byte, for speed. if (*opnd != *rex.input - && (!rex.reg_ic - || (!enc_utf8 - && mb_tolower(*opnd) != mb_tolower(*rex.input)))) { + && (!rex.reg_ic)) { status = RA_NOMATCH; } else if (*opnd == NUL) { // match empty string always works; happens when "~" is // empty. } else { - if (opnd[1] == NUL && !(enc_utf8 && rex.reg_ic)) { + if (opnd[1] == NUL && !rex.reg_ic) { len = 1; // matched a single byte above } else { // Need to match first byte again for multi-byte. @@ -4267,7 +4259,7 @@ static bool regmatch( } // Check for following composing character, unless %C // follows (skips over all composing chars). - if (status != RA_NOMATCH && enc_utf8 + if (status != RA_NOMATCH && UTF_COMPOSINGLIKE(rex.input, rex.input + len) && !rex.reg_icombine && OP(next) != RE_COMPOSING) { @@ -4336,7 +4328,7 @@ static bool regmatch( break; case RE_COMPOSING: - if (enc_utf8) { + { // Skip composing characters. while (utf_iscomposing(utf_ptr2char(rex.input))) { MB_CPTR_ADV(rex.input); @@ -5366,9 +5358,10 @@ do_class: if (got_int) { break; } - } else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1) { - if (testval != 0) + } else if ((l = (*mb_ptr2len)(scan)) > 1) { + if (testval != 0) { break; + } scan += l; } else if ((class_tab[*scan] & mask) == testval) { scan++; @@ -5481,7 +5474,7 @@ do_class: /* Safety check (just in case 'encoding' was changed since * compiling the program). */ if ((len = (*mb_ptr2len)(opnd)) > 1) { - if (rex.reg_ic && enc_utf8) { + if (rex.reg_ic) { cf = utf_fold(utf_ptr2char(opnd)); } while (count < maxcount && (*mb_ptr2len)(scan) >= len) { @@ -5490,7 +5483,7 @@ do_class: break; } } - if (i < len && (!rex.reg_ic || !enc_utf8 + if (i < len && (!rex.reg_ic || utf_fold(utf_ptr2char(scan)) != cf)) { break; } @@ -6383,7 +6376,7 @@ static int cstrncmp(char_u *s1, char_u *s2, int *n) } // if it failed and it's utf8 and we want to combineignore: - if (result != 0 && enc_utf8 && rex.reg_icombine) { + if (result != 0 && rex.reg_icombine) { char_u *str1, *str2; int c1, c2, c11, c12; int junk; @@ -6501,10 +6494,10 @@ char_u *regtilde(char_u *source, int magic) STRMOVE(p, p + 2); /* remove '\~' */ --p; } else { - if (*p == '\\' && p[1]) /* skip escaped characters */ - ++p; - if (has_mbyte) - p += (*mb_ptr2len)(p) - 1; + if (*p == '\\' && p[1]) { // skip escaped characters + p++; + } + p += (*mb_ptr2len)(p) - 1; } } @@ -6940,7 +6933,7 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, else /* just copy */ cc = c; - if (has_mbyte) { + { int l; // Copy composing characters separately, one @@ -6953,8 +6946,6 @@ static int vim_regsub_both(char_u *source, typval_T *expr, char_u *dest, utf_char2bytes(cc, dst); } dst += utf_char2len(cc) - 1; - } else if (copy) { - *dst = cc; } dst++; } diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 7cd1ae93d2..7dfd16fb4f 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -704,8 +704,7 @@ static void nfa_emit_equi_class(int c) #define EMIT2(c) EMIT(c); EMIT(NFA_CONCAT); #define EMITMBC(c) EMIT(c); EMIT(NFA_CONCAT); - if (enc_utf8 || STRCMP(p_enc, "latin1") == 0 - || STRCMP(p_enc, "iso-8859-15") == 0) { + { #define A_grave 0xc0 #define A_acute 0xc1 #define A_circumflex 0xc2 @@ -1246,7 +1245,7 @@ static int nfa_regatom(void) } // When '.' is followed by a composing char ignore the dot, so that // the composing char is matched here. - if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr())) { + if (c == Magic('.') && utf_iscomposing(peekchr())) { old_regparse = regparse; c = getchr(); goto nfa_do_multibyte; @@ -1737,11 +1736,10 @@ collection: EMIT(endc); EMIT(NFA_RANGE); EMIT(NFA_CONCAT); - } else if (has_mbyte && ((*mb_char2len)(startc) > 1 - || (*mb_char2len)(endc) > 1)) { - /* Emit the characters in the range. - * "startc" was already emitted, so skip it. - * */ + } else if ((*mb_char2len)(startc) > 1 + || (*mb_char2len)(endc) > 1) { + // Emit the characters in the range. + // "startc" was already emitted, so skip it. for (c = startc + 1; c <= endc; c++) { EMIT(c); EMIT(NFA_CONCAT); @@ -1819,9 +1817,8 @@ collection: nfa_do_multibyte: // plen is length of current char with composing chars - if (enc_utf8 && ((*mb_char2len)(c) - != (plen = utfc_ptr2len(old_regparse)) - || utf_iscomposing(c))) { + if ((*mb_char2len)(c) != (plen = utfc_ptr2len(old_regparse)) + || utf_iscomposing(c)) { int i = 0; /* A base character plus composing characters, or just one @@ -4995,7 +4992,7 @@ static long find_match_text(colnr_T startcol, int regstart, char_u *match_text) } if (match // check that no composing char follows - && !(enc_utf8 && utf_iscomposing(PTR2CHAR(s2)))) { + && !utf_iscomposing(PTR2CHAR(s2))) { cleanup_subexpr(); if (REG_MULTI) { rex.reg_startpos[0].lnum = rex.lnum; @@ -5248,7 +5245,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, { // If the match ends before a composing characters and // rex.reg_icombine is not set, that is not really a match. - if (enc_utf8 && !rex.reg_icombine && utf_iscomposing(curc)) { + if (!rex.reg_icombine && utf_iscomposing(curc)) { break; } nfa_match = true; @@ -5747,7 +5744,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, case NFA_ANY_COMPOSING: // On a composing character skip over it. Otherwise do // nothing. Always matches. - if (enc_utf8 && utf_iscomposing(curc)) { + if (utf_iscomposing(curc)) { add_off = clen; } else { add_here = true; @@ -6019,7 +6016,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // Bail out quickly when there can't be a match, avoid the overhead of // win_linetabsize() on long lines. - if (op != 1 && col > t->state->val * (has_mbyte ? MB_MAXBYTES : 1)) { + if (op != 1 && col > t->state->val * MB_MAXBYTES) { break; } @@ -6132,7 +6129,7 @@ static int nfa_regmatch(nfa_regprog_T *prog, nfa_state_T *start, // If rex.reg_icombine is not set only skip over the character // itself. When it is set skip over composing characters. - if (result && enc_utf8 && !rex.reg_icombine) { + if (result && !rex.reg_icombine) { clen = utf_ptr2len(rex.input); } diff --git a/src/nvim/search.c b/src/nvim/search.c index b25333c9fa..f979889540 100644 --- a/src/nvim/search.c +++ b/src/nvim/search.c @@ -218,14 +218,11 @@ char_u *reverse_text(char_u *s) FUNC_ATTR_NONNULL_RET size_t len = STRLEN(s); char_u *rev = xmalloc(len + 1); size_t rev_i = len; - for (size_t s_i = 0; s_i < len; ++s_i) { - if (has_mbyte) { - int mb_len = (*mb_ptr2len)(s + s_i); - rev_i -= mb_len; - memmove(rev + rev_i, s + s_i, mb_len); - s_i += mb_len - 1; - } else - rev[--rev_i] = s[s_i]; + for (size_t s_i = 0; s_i < len; s_i++) { + const int mb_len = utfc_ptr2len(s + s_i); + rev_i -= mb_len; + memmove(rev + rev_i, s + s_i, mb_len); + s_i += mb_len - 1; } rev[len] = NUL; @@ -594,8 +591,8 @@ int searchit( // is zero. if (pos->col == MAXCOL) { start_char_len = 0; - } else if (has_mbyte - && pos->lnum >= 1 && pos->lnum <= buf->b_ml.ml_line_count + } else if (pos->lnum >= 1 + && pos->lnum <= buf->b_ml.ml_line_count && pos->col < MAXCOL - 2) { // Watch out for the "col" being MAXCOL - 2, used in a closed fold. ptr = ml_get_buf(buf, pos->lnum, false); @@ -1553,34 +1550,26 @@ int searchc(cmdarg_T *cap, int t_cmd) len = (int)STRLEN(p); while (count--) { - if (has_mbyte) { - for (;; ) { - if (dir > 0) { - col += (*mb_ptr2len)(p + col); - if (col >= len) - return FAIL; - } else { - if (col == 0) - return FAIL; - col -= utf_head_off(p, p + col - 1) + 1; + for (;; ) { + if (dir > 0) { + col += utfc_ptr2len(p + col); + if (col >= len) { + return FAIL; } - if (lastc_bytelen == 1) { - if (p[col] == c && stop) { - break; - } - } else if (STRNCMP(p + col, lastc_bytes, lastc_bytelen) == 0 && stop) { - break; + } else { + if (col == 0) { + return FAIL; } - stop = true; + col -= utf_head_off(p, p + col - 1) + 1; } - } else { - for (;; ) { - if ((col += dir) < 0 || col >= len) - return FAIL; - if (p[col] == c && stop) + if (lastc_bytelen == 1) { + if (p[col] == c && stop) { break; - stop = TRUE; + } + } else if (STRNCMP(p + col, lastc_bytes, lastc_bytelen) == 0 && stop) { + break; } + stop = true; } } @@ -1964,10 +1953,7 @@ pos_T *findmatchlimit(oparg_T *oap, int initc, int flags, int64_t maxtravel) if (lisp) /* find comment pos in new line */ comment_col = check_linecomment(linep); } else { - if (has_mbyte) - pos.col += (*mb_ptr2len)(linep + pos.col); - else - ++pos.col; + pos.col += utfc_ptr2len(linep + pos.col); } } diff --git a/src/nvim/spell.c b/src/nvim/spell.c index 636c71657d..797fe41320 100644 --- a/src/nvim/spell.c +++ b/src/nvim/spell.c @@ -513,10 +513,7 @@ size_t spell_check( } } - if (has_mbyte) { - return (size_t)(*mb_ptr2len)(ptr); - } - return 1; + return (size_t)(utfc_ptr2len(ptr)); } else if (mi.mi_end == ptr) { // Always include at least one character. Required for when there // is a mixup in "midword". @@ -722,7 +719,7 @@ static void find_word(matchinf_T *mip, int mode) // has been found we try compound flags. bool prefix_found = false; - if (mode != FIND_KEEPWORD && has_mbyte) { + if (mode != FIND_KEEPWORD) { // Compute byte length in original word, length may change // when folding case. This can be slow, take a shortcut when the // case-folded word is equal to the keep-case word. @@ -796,11 +793,11 @@ static void find_word(matchinf_T *mip, int mode) continue; // For multi-byte chars check character length against // COMPOUNDMIN. - if (has_mbyte - && slang->sl_compminlen > 0 + if (slang->sl_compminlen > 0 && mb_charlen_len(mip->mi_word + mip->mi_compoff, - wlen - mip->mi_compoff) < slang->sl_compminlen) + wlen - mip->mi_compoff) < slang->sl_compminlen) { continue; + } // Limit the number of compound words to COMPOUNDWORDMAX if no // maximum for syllables is specified. @@ -833,8 +830,7 @@ static void find_word(matchinf_T *mip, int mode) // Need to check the caps type of the appended compound // word. - if (has_mbyte && STRNCMP(ptr, mip->mi_word, - mip->mi_compoff) != 0) { + if (STRNCMP(ptr, mip->mi_word, mip->mi_compoff) != 0) { // case folding may have changed the length p = mip->mi_word; for (char_u *s = ptr; s < ptr + mip->mi_compoff; MB_PTR_ADV(s)) { @@ -907,7 +903,7 @@ static void find_word(matchinf_T *mip, int mode) // Find following word in case-folded tree. mip->mi_compoff = endlen[endidxcnt]; - if (has_mbyte && mode == FIND_KEEPWORD) { + if (mode == FIND_KEEPWORD) { // Compute byte length in case-folded word from "wlen": // byte length in keep-case word. Length may change when // folding case. This can be slow, take a shortcut when @@ -1260,12 +1256,9 @@ static void find_prefix(matchinf_T *mip, int mode) // Skip over the previously found word(s). mip->mi_prefixlen += mip->mi_compoff; - if (has_mbyte) { - // Case-folded length may differ from original length. - mip->mi_cprefixlen = nofold_len(mip->mi_fword, - mip->mi_prefixlen, mip->mi_word); - } else - mip->mi_cprefixlen = mip->mi_prefixlen; + // Case-folded length may differ from original length. + mip->mi_cprefixlen = nofold_len(mip->mi_fword, mip->mi_prefixlen, + mip->mi_word); find_word(mip, FIND_PREFIX); @@ -2272,35 +2265,30 @@ static void clear_midword(win_T *wp) // Use the "sl_midword" field of language "lp" for buffer "buf". // They add up to any currently used midword characters. static void use_midword(slang_T *lp, win_T *wp) + FUNC_ATTR_NONNULL_ALL { - char_u *p; - - if (lp->sl_midword == NULL) // there aren't any + if (lp->sl_midword == NULL) { // there aren't any return; + } - for (p = lp->sl_midword; *p != NUL; ) - if (has_mbyte) { - int c, l, n; - char_u *bp; - - c = utf_ptr2char(p); - l = (*mb_ptr2len)(p); - if (c < 256 && l <= 2) - wp->w_s->b_spell_ismw[c] = true; - else if (wp->w_s->b_spell_ismw_mb == NULL) - // First multi-byte char in "b_spell_ismw_mb". - wp->w_s->b_spell_ismw_mb = vim_strnsave(p, l); - else { - // Append multi-byte chars to "b_spell_ismw_mb". - n = (int)STRLEN(wp->w_s->b_spell_ismw_mb); - bp = vim_strnsave(wp->w_s->b_spell_ismw_mb, n + l); - xfree(wp->w_s->b_spell_ismw_mb); - wp->w_s->b_spell_ismw_mb = bp; - STRLCPY(bp + n, p, l + 1); - } - p += l; - } else - wp->w_s->b_spell_ismw[*p++] = true; + for (char_u *p = lp->sl_midword; *p != NUL; ) { + const int c = utf_ptr2char(p); + const int l = utfc_ptr2len(p); + if (c < 256 && l <= 2) { + wp->w_s->b_spell_ismw[c] = true; + } else if (wp->w_s->b_spell_ismw_mb == NULL) { + // First multi-byte char in "b_spell_ismw_mb". + wp->w_s->b_spell_ismw_mb = vim_strnsave(p, l); + } else { + // Append multi-byte chars to "b_spell_ismw_mb". + const int n = (int)STRLEN(wp->w_s->b_spell_ismw_mb); + char_u *bp = vim_strnsave(wp->w_s->b_spell_ismw_mb, n + l); + xfree(wp->w_s->b_spell_ismw_mb); + wp->w_s->b_spell_ismw_mb = bp; + STRLCPY(bp + n, p, l + 1); + } + p += l; + } } // Find the region "region[2]" in "rp" (points to "sl_regions"). @@ -2333,7 +2321,6 @@ int captype(char_u *word, char_u *end) FUNC_ATTR_NONNULL_ARG(1) { char_u *p; - int c; int firstcap; bool allcap; bool past_second = false; // past second word char @@ -2344,11 +2331,7 @@ int captype(char_u *word, char_u *end) return 0; // only non-word characters, illegal word } } - if (has_mbyte) { - c = mb_ptr2char_adv((const char_u **)&p); - } else { - c = *p++; - } + int c = mb_ptr2char_adv((const char_u **)&p); firstcap = allcap = SPELL_ISUPPER(c); // Need to check all letters to find a word with mixed upper/lower. @@ -2673,34 +2656,23 @@ static bool spell_iswordp_w(const int *p, const win_T *wp) // Returns FAIL when something wrong. int spell_casefold(char_u *str, int len, char_u *buf, int buflen) { - int i; - if (len >= buflen) { buf[0] = NUL; return FAIL; // result will not fit } - if (has_mbyte) { - int outi = 0; - char_u *p; - int c; + int outi = 0; - // Fold one character at a time. - for (p = str; p < str + len; ) { - if (outi + MB_MAXBYTES > buflen) { - buf[outi] = NUL; - return FAIL; - } - c = mb_cptr2char_adv((const char_u **)&p); - outi += utf_char2bytes(SPELL_TOFOLD(c), buf + outi); + // Fold one character at a time. + for (char_u *p = str; p < str + len; ) { + if (outi + MB_MAXBYTES > buflen) { + buf[outi] = NUL; + return FAIL; } - buf[outi] = NUL; - } else { - // Be quick for non-multibyte encodings. - for (i = 0; i < len; ++i) - buf[i] = spelltab.st_fold[str[i]]; - buf[i] = NUL; + const int c = mb_cptr2char_adv((const char_u **)&p); + outi += utf_char2bytes(SPELL_TOFOLD(c), buf + outi); } + buf[outi] = NUL; return OK; } @@ -3428,22 +3400,14 @@ static void spell_find_cleanup(suginfo_T *su) /// @param[in] upper True to upper case, otherwise lower case void onecap_copy(char_u *word, char_u *wcopy, bool upper) { - char_u *p; - int c; - int l; - - p = word; - if (has_mbyte) { - c = mb_cptr2char_adv((const char_u **)&p); - } else { - c = *p++; - } + char_u *p = word; + int c = mb_cptr2char_adv((const char_u **)&p); if (upper) { c = SPELL_TOUPPER(c); } else { c = SPELL_TOFOLD(c); } - l = utf_char2bytes(c, wcopy); + int l = utf_char2bytes(c, wcopy); STRLCPY(wcopy + l, p, MAXWLEN - l); } @@ -3451,17 +3415,9 @@ void onecap_copy(char_u *word, char_u *wcopy, bool upper) // "wcopy[MAXWLEN]". The result is NUL terminated. static void allcap_copy(char_u *word, char_u *wcopy) { - char_u *s; - char_u *d; - int c; - - d = wcopy; - for (s = word; *s != NUL; ) { - if (has_mbyte) { - c = mb_cptr2char_adv((const char_u **)&s); - } else { - c = *s++; - } + char_u *d = wcopy; + for (char_u *s = word; *s != NUL; ) { + int c = mb_cptr2char_adv((const char_u **)&s); if (c == 0xdf) { c = 'S'; @@ -3730,10 +3686,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX) { // Set su->su_badflags to the caps type at this position. // Use the caps type until here for the prefix itself. - if (has_mbyte) - n = nofold_len(fword, sp->ts_fidx, su->su_badptr); - else - n = sp->ts_fidx; + n = nofold_len(fword, sp->ts_fidx, su->su_badptr); flags = badword_captype(su->su_badptr, su->su_badptr + n); su->su_badflags = badword_captype(su->su_badptr + n, su->su_badptr + su->su_badlen); @@ -3851,15 +3804,16 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so // flag). if (((unsigned)flags >> 24) == 0 || sp->ts_twordlen - sp->ts_splitoff - < slang->sl_compminlen) + < slang->sl_compminlen) { break; + } // For multi-byte chars check character length against // COMPOUNDMIN. - if (has_mbyte - && slang->sl_compminlen > 0 + if (slang->sl_compminlen > 0 && mb_charlen(tword + sp->ts_splitoff) - < slang->sl_compminlen) + < slang->sl_compminlen) { break; + } compflags[sp->ts_complen] = ((unsigned)flags >> 24); compflags[sp->ts_complen + 1] = NUL; @@ -4014,7 +3968,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so // Try word split and/or compounding. if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends) // Don't split in the middle of a character - && (!has_mbyte || sp->ts_tcharlen == 0) + && (sp->ts_tcharlen == 0) ) { bool try_compound; int try_split; @@ -4046,8 +4000,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so && ((unsigned)flags >> 24) != 0 && sp->ts_twordlen - sp->ts_splitoff >= slang->sl_compminlen - && (!has_mbyte - || slang->sl_compminlen == 0 + && (slang->sl_compminlen == 0 || mb_charlen(tword + sp->ts_splitoff) >= slang->sl_compminlen) && (slang->sl_compsylmax < MAXWLEN @@ -4166,10 +4119,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so // set su->su_badflags to the caps type at this // position - if (has_mbyte) - n = nofold_len(fword, sp->ts_fidx, su->su_badptr); - else - n = sp->ts_fidx; + n = nofold_len(fword, sp->ts_fidx, su->su_badptr); su->su_badflags = badword_captype(su->su_badptr + n, su->su_badptr + su->su_badlen); @@ -4266,84 +4216,74 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so ++sp->ts_fidx; tword[sp->ts_twordlen++] = c; sp->ts_arridx = idxs[arridx]; - if (newscore == SCORE_SUBST) + if (newscore == SCORE_SUBST) { sp->ts_isdiff = DIFF_YES; - if (has_mbyte) { - // Multi-byte characters are a bit complicated to - // handle: They differ when any of the bytes differ - // and then their length may also differ. - if (sp->ts_tcharlen == 0) { - // First byte. - sp->ts_tcharidx = 0; - sp->ts_tcharlen = MB_BYTE2LEN(c); - sp->ts_fcharstart = sp->ts_fidx - 1; - sp->ts_isdiff = (newscore != 0) - ? DIFF_YES : DIFF_NONE; - } else if (sp->ts_isdiff == DIFF_INSERT) - // When inserting trail bytes don't advance in the - // bad word. - --sp->ts_fidx; - if (++sp->ts_tcharidx == sp->ts_tcharlen) { - // Last byte of character. - if (sp->ts_isdiff == DIFF_YES) { - // Correct ts_fidx for the byte length of the - // character (we didn't check that before). - sp->ts_fidx = sp->ts_fcharstart - + utfc_ptr2len(fword + sp->ts_fcharstart); - - // For changing a composing character adjust - // the score from SCORE_SUBST to - // SCORE_SUBCOMP. - if (utf_iscomposing(utf_ptr2char(tword + sp->ts_twordlen - - sp->ts_tcharlen)) - && utf_iscomposing(utf_ptr2char(fword - + sp->ts_fcharstart))) { - sp->ts_score -= SCORE_SUBST - SCORE_SUBCOMP; - } else if ( - !soundfold - && slang->sl_has_map - && similar_chars( - slang, - utf_ptr2char(tword + sp->ts_twordlen - sp->ts_tcharlen), - utf_ptr2char(fword + sp->ts_fcharstart))) { - // For a similar character adjust score from - // SCORE_SUBST to SCORE_SIMILAR. - sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; - } - } else if (sp->ts_isdiff == DIFF_INSERT - && sp->ts_twordlen > sp->ts_tcharlen) { - p = tword + sp->ts_twordlen - sp->ts_tcharlen; - c = utf_ptr2char(p); - if (utf_iscomposing(c)) { - // Inserting a composing char doesn't - // count that much. - sp->ts_score -= SCORE_INS - SCORE_INSCOMP; - } else { - // If the previous character was the same, - // thus doubling a character, give a bonus - // to the score. Also for the soundfold - // tree (might seem illogical but does - // give better scores). - MB_PTR_BACK(tword, p); - if (c == utf_ptr2char(p)) { - sp->ts_score -= SCORE_INS - SCORE_INSDUP; - } + } + // Multi-byte characters are a bit complicated to + // handle: They differ when any of the bytes differ + // and then their length may also differ. + if (sp->ts_tcharlen == 0) { + // First byte. + sp->ts_tcharidx = 0; + sp->ts_tcharlen = MB_BYTE2LEN(c); + sp->ts_fcharstart = sp->ts_fidx - 1; + sp->ts_isdiff = (newscore != 0) + ? DIFF_YES : DIFF_NONE; + } else if (sp->ts_isdiff == DIFF_INSERT) { + // When inserting trail bytes don't advance in the + // bad word. + sp->ts_fidx--; + } + if (++sp->ts_tcharidx == sp->ts_tcharlen) { + // Last byte of character. + if (sp->ts_isdiff == DIFF_YES) { + // Correct ts_fidx for the byte length of the + // character (we didn't check that before). + sp->ts_fidx = sp->ts_fcharstart + + utfc_ptr2len(fword + sp->ts_fcharstart); + + // For changing a composing character adjust + // the score from SCORE_SUBST to + // SCORE_SUBCOMP. + if (utf_iscomposing(utf_ptr2char(tword + sp->ts_twordlen + - sp->ts_tcharlen)) + && utf_iscomposing(utf_ptr2char(fword + + sp->ts_fcharstart))) { + sp->ts_score -= SCORE_SUBST - SCORE_SUBCOMP; + } else if ( + !soundfold + && slang->sl_has_map + && similar_chars( + slang, + utf_ptr2char(tword + sp->ts_twordlen - sp->ts_tcharlen), + utf_ptr2char(fword + sp->ts_fcharstart))) { + // For a similar character adjust score from + // SCORE_SUBST to SCORE_SIMILAR. + sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; + } + } else if (sp->ts_isdiff == DIFF_INSERT + && sp->ts_twordlen > sp->ts_tcharlen) { + p = tword + sp->ts_twordlen - sp->ts_tcharlen; + c = utf_ptr2char(p); + if (utf_iscomposing(c)) { + // Inserting a composing char doesn't + // count that much. + sp->ts_score -= SCORE_INS - SCORE_INSCOMP; + } else { + // If the previous character was the same, + // thus doubling a character, give a bonus + // to the score. Also for the soundfold + // tree (might seem illogical but does + // give better scores). + MB_PTR_BACK(tword, p); + if (c == utf_ptr2char(p)) { + sp->ts_score -= SCORE_INS - SCORE_INSDUP; } } - - // Starting a new char, reset the length. - sp->ts_tcharlen = 0; } - } else { - // If we found a similar char adjust the score. - // We do this after calling go_deeper() because - // it's slow. - if (newscore != 0 - && !soundfold - && slang->sl_has_map - && similar_chars(slang, - c, fword[sp->ts_fidx - 1])) - sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR; + + // Starting a new char, reset the length. + sp->ts_tcharlen = 0; } } } @@ -4352,7 +4292,7 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so case STATE_DEL: // When past the first byte of a multi-byte char don't try // delete/insert/swap a character. - if (has_mbyte && sp->ts_tcharlen > 0) { + if (sp->ts_tcharlen > 0) { PROF_STORE(sp->ts_state) sp->ts_state = STATE_FINAL; break; @@ -4461,18 +4401,15 @@ static void suggest_trie_walk(suginfo_T *su, langp_T *lp, char_u *fword, bool so sp = &stack[depth]; tword[sp->ts_twordlen++] = c; sp->ts_arridx = idxs[n]; - if (has_mbyte) { - fl = MB_BYTE2LEN(c); - if (fl > 1) { - // There are following bytes for the same character. - // We must find all bytes before trying - // delete/insert/swap/etc. - sp->ts_tcharlen = fl; - sp->ts_tcharidx = 1; - sp->ts_isdiff = DIFF_INSERT; - } - } else - fl = 1; + fl = MB_BYTE2LEN(c); + if (fl > 1) { + // There are following bytes for the same character. + // We must find all bytes before trying + // delete/insert/swap/etc. + sp->ts_tcharlen = fl; + sp->ts_tcharidx = 1; + sp->ts_isdiff = DIFF_INSERT; + } if (fl == 1) { // If the previous character was the same, thus doubling a // character, give a bonus to the score. Also for @@ -4914,12 +4851,8 @@ static void find_keepcap_word(slang_T *slang, char_u *fword, char_u *kword) } else { // round[depth] == 1: Try using the folded-case character. // round[depth] == 2: Try using the upper-case character. - if (has_mbyte) { - flen = MB_CPTR2LEN(fword + fwordidx[depth]); - ulen = MB_CPTR2LEN(uword + uwordidx[depth]); - } else { - ulen = flen = 1; - } + flen = MB_CPTR2LEN(fword + fwordidx[depth]); + ulen = MB_CPTR2LEN(uword + uwordidx[depth]); if (round[depth] == 1) { p = fword + fwordidx[depth]; l = flen; @@ -5872,57 +5805,43 @@ void spell_soundfold(slang_T *slang, char_u *inword, bool folded, char_u *res) // SOFOTO lines. static void spell_soundfold_sofo(slang_T *slang, char_u *inword, char_u *res) { - char_u *s; int ri = 0; - int c; - if (has_mbyte) { - int prevc = 0; - int *ip; + int prevc = 0; - // The sl_sal_first[] table contains the translation for chars up to - // 255, sl_sal the rest. - for (s = inword; *s != NUL; ) { - c = mb_cptr2char_adv((const char_u **)&s); - if (utf_class(c) == 0) { - c = ' '; - } else if (c < 256) { - c = slang->sl_sal_first[c]; + // The sl_sal_first[] table contains the translation for chars up to + // 255, sl_sal the rest. + for (char_u *s = inword; *s != NUL; ) { + int c = mb_cptr2char_adv((const char_u **)&s); + if (utf_class(c) == 0) { + c = ' '; + } else if (c < 256) { + c = slang->sl_sal_first[c]; + } else { + int *ip = ((int **)slang->sl_sal.ga_data)[c & 0xff]; + if (ip == NULL) { // empty list, can't match + c = NUL; } else { - ip = ((int **)slang->sl_sal.ga_data)[c & 0xff]; - if (ip == NULL) // empty list, can't match - c = NUL; - else - for (;; ) { // find "c" in the list - if (*ip == 0) { // not found - c = NUL; - break; - } - if (*ip == c) { // match! - c = ip[1]; - break; - } - ip += 2; + for (;; ) { // find "c" in the list + if (*ip == 0) { // not found + c = NUL; + break; } - } - - if (c != NUL && c != prevc) { - ri += utf_char2bytes(c, res + ri); - if (ri + MB_MAXBYTES > MAXWLEN) { - break; + if (*ip == c) { // match! + c = ip[1]; + break; + } + ip += 2; } - prevc = c; } } - } else { - // The sl_sal_first[] table contains the translation. - for (s = inword; (c = *s) != NUL; ++s) { - if (ascii_iswhite(c)) - c = ' '; - else - c = slang->sl_sal_first[c]; - if (c != NUL && (ri == 0 || res[ri - 1] != c)) - res[ri++] = c; + + if (c != NUL && c != prevc) { + ri += utf_char2bytes(c, res + ri); + if (ri + MB_MAXBYTES > MAXWLEN) { + break; + } + prevc = c; } } @@ -6425,12 +6344,11 @@ static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword) int pbc, pgc; int wbadword[MAXWLEN]; int wgoodword[MAXWLEN]; - const bool l_has_mbyte = has_mbyte; // Lengths with NUL. int badlen; int goodlen; - if (l_has_mbyte) { + { // Get the characters from the multi-byte strings and put them in an // int array for easy access. badlen = 0; @@ -6443,9 +6361,6 @@ static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword) wgoodword[goodlen++] = mb_cptr2char_adv(&p); } wgoodword[goodlen++] = 0; - } else { - badlen = (int)STRLEN(badword) + 1; - goodlen = (int)STRLEN(goodword) + 1; } // We use "cnt" as an array: CNT(badword_idx, goodword_idx). @@ -6458,17 +6373,12 @@ static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword) for (i = 1; i <= badlen; ++i) { CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL; - for (j = 1; j <= goodlen; ++j) { - if (l_has_mbyte) { - bc = wbadword[i - 1]; - gc = wgoodword[j - 1]; - } else { - bc = badword[i - 1]; - gc = goodword[j - 1]; - } - if (bc == gc) + for (j = 1; j <= goodlen; j++) { + bc = wbadword[i - 1]; + gc = wgoodword[j - 1]; + if (bc == gc) { CNT(i, j) = CNT(i - 1, j - 1); - else { + } else { // Use a better score when there is only a case difference. if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1); @@ -6483,13 +6393,8 @@ static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword) } if (i > 1 && j > 1) { - if (l_has_mbyte) { - pbc = wbadword[i - 2]; - pgc = wgoodword[j - 2]; - } else { - pbc = badword[i - 2]; - pgc = goodword[j - 2]; - } + pbc = wbadword[i - 2]; + pgc = wgoodword[j - 2]; if (bc == pgc && pbc == gc) { t = SCORE_SWAP + CNT(i - 2, j - 2); if (t < CNT(i, j)) @@ -6519,147 +6424,7 @@ static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword) // for multi-byte characters. static int spell_edit_score_limit(slang_T *slang, char_u *badword, char_u *goodword, int limit) { - limitscore_T stack[10]; // allow for over 3 * 2 edits - int stackidx; - int bi, gi; - int bi2, gi2; - int bc, gc; - int score; - int score_off; - int minscore; - int round; - - // Multi-byte characters require a bit more work, use a different function - // to avoid testing "has_mbyte" quite often. - if (has_mbyte) - return spell_edit_score_limit_w(slang, badword, goodword, limit); - - // The idea is to go from start to end over the words. So long as - // characters are equal just continue, this always gives the lowest score. - // When there is a difference try several alternatives. Each alternative - // increases "score" for the edit distance. Some of the alternatives are - // pushed unto a stack and tried later, some are tried right away. At the - // end of the word the score for one alternative is known. The lowest - // possible score is stored in "minscore". - stackidx = 0; - bi = 0; - gi = 0; - score = 0; - minscore = limit + 1; - - for (;; ) { - // Skip over an equal part, score remains the same. - for (;; ) { - bc = badword[bi]; - gc = goodword[gi]; - if (bc != gc) // stop at a char that's different - break; - if (bc == NUL) { // both words end - if (score < minscore) - minscore = score; - goto pop; // do next alternative - } - ++bi; - ++gi; - } - - if (gc == NUL) { // goodword ends, delete badword chars - do { - if ((score += SCORE_DEL) >= minscore) - goto pop; // do next alternative - } while (badword[++bi] != NUL); - minscore = score; - } else if (bc == NUL) { // badword ends, insert badword chars - do { - if ((score += SCORE_INS) >= minscore) - goto pop; // do next alternative - } while (goodword[++gi] != NUL); - minscore = score; - } else { // both words continue - // If not close to the limit, perform a change. Only try changes - // that may lead to a lower score than "minscore". - // round 0: try deleting a char from badword - // round 1: try inserting a char in badword - for (round = 0; round <= 1; ++round) { - score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS); - if (score_off < minscore) { - if (score_off + SCORE_EDIT_MIN >= minscore) { - // Near the limit, rest of the words must match. We - // can check that right now, no need to push an item - // onto the stack. - bi2 = bi + 1 - round; - gi2 = gi + round; - while (goodword[gi2] == badword[bi2]) { - if (goodword[gi2] == NUL) { - minscore = score_off; - break; - } - ++bi2; - ++gi2; - } - } else { - // try deleting/inserting a character later - stack[stackidx].badi = bi + 1 - round; - stack[stackidx].goodi = gi + round; - stack[stackidx].score = score_off; - ++stackidx; - } - } - } - - if (score + SCORE_SWAP < minscore) { - // If swapping two characters makes a match then the - // substitution is more expensive, thus there is no need to - // try both. - if (gc == badword[bi + 1] && bc == goodword[gi + 1]) { - // Swap two characters, that is: skip them. - gi += 2; - bi += 2; - score += SCORE_SWAP; - continue; - } - } - - // Substitute one character for another which is the same - // thing as deleting a character from both goodword and badword. - // Use a better score when there is only a case difference. - if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc)) - score += SCORE_ICASE; - else { - // For a similar character use SCORE_SIMILAR. - if (slang != NULL - && slang->sl_has_map - && similar_chars(slang, gc, bc)) - score += SCORE_SIMILAR; - else - score += SCORE_SUBST; - } - - if (score < minscore) { - // Do the substitution. - ++gi; - ++bi; - continue; - } - } -pop: - // Get here to try the next alternative, pop it from the stack. - if (stackidx == 0) // stack is empty, finished - break; - - // pop an item from the stack - --stackidx; - gi = stack[stackidx].goodi; - bi = stack[stackidx].badi; - score = stack[stackidx].score; - } - - // When the score goes over "limit" it may actually be much higher. - // Return a very large number to avoid going below the limit when giving a - // bonus. - if (minscore > limit) - return SCORE_MAXMAX; - return minscore; + return spell_edit_score_limit_w(slang, badword, goodword, limit); } // Multi-byte version of spell_edit_score_limit(). diff --git a/src/nvim/spellfile.c b/src/nvim/spellfile.c index b415a4635b..90af010164 100644 --- a/src/nvim/spellfile.c +++ b/src/nvim/spellfile.c @@ -1221,18 +1221,18 @@ static int read_sal_section(FILE *fd, slang_T *slang) return ccnt; } - if (has_mbyte) { - // convert the multi-byte strings to wide char strings - smp->sm_lead_w = mb_str2wide(smp->sm_lead); - smp->sm_leadlen = mb_charlen(smp->sm_lead); - if (smp->sm_oneof == NULL) - smp->sm_oneof_w = NULL; - else - smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); - if (smp->sm_to == NULL) - smp->sm_to_w = NULL; - else - smp->sm_to_w = mb_str2wide(smp->sm_to); + // convert the multi-byte strings to wide char strings + smp->sm_lead_w = mb_str2wide(smp->sm_lead); + smp->sm_leadlen = mb_charlen(smp->sm_lead); + if (smp->sm_oneof == NULL) { + smp->sm_oneof_w = NULL; + } else { + smp->sm_oneof_w = mb_str2wide(smp->sm_oneof); + } + if (smp->sm_to == NULL) { + smp->sm_to_w = NULL; + } else { + smp->sm_to_w = mb_str2wide(smp->sm_to); } } @@ -1488,72 +1488,61 @@ static int read_compound(FILE *fd, slang_T *slang, int len) // Returns SP_*ERROR flags when there is something wrong. static int set_sofo(slang_T *lp, char_u *from, char_u *to) { - int i; - - garray_T *gap; char_u *s; char_u *p; - int c; - int *inp; - - if (has_mbyte) { - // Use "sl_sal" as an array with 256 pointers to a list of wide - // characters. The index is the low byte of the character. - // The list contains from-to pairs with a terminating NUL. - // sl_sal_first[] is used for latin1 "from" characters. - gap = &lp->sl_sal; - ga_init(gap, sizeof(int *), 1); - ga_grow(gap, 256); - memset(gap->ga_data, 0, sizeof(int *) * 256); - gap->ga_len = 256; - - // First count the number of items for each list. Temporarily use - // sl_sal_first[] for this. - for (p = from, s = to; *p != NUL && *s != NUL; ) { - c = mb_cptr2char_adv((const char_u **)&p); - MB_CPTR_ADV(s); - if (c >= 256) { - lp->sl_sal_first[c & 0xff]++; - } - } - if (*p != NUL || *s != NUL) // lengths differ - return SP_FORMERROR; - // Allocate the lists. - for (i = 0; i < 256; ++i) - if (lp->sl_sal_first[i] > 0) { - p = xmalloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); - ((int **)gap->ga_data)[i] = (int *)p; - *(int *)p = 0; - } + // Use "sl_sal" as an array with 256 pointers to a list of wide + // characters. The index is the low byte of the character. + // The list contains from-to pairs with a terminating NUL. + // sl_sal_first[] is used for latin1 "from" characters. + garray_T *gap = &lp->sl_sal; + ga_init(gap, sizeof(int *), 1); + ga_grow(gap, 256); + memset(gap->ga_data, 0, sizeof(int *) * 256); + gap->ga_len = 256; + + // First count the number of items for each list. Temporarily use + // sl_sal_first[] for this. + for (p = from, s = to; *p != NUL && *s != NUL; ) { + const int c = mb_cptr2char_adv((const char_u **)&p); + MB_CPTR_ADV(s); + if (c >= 256) { + lp->sl_sal_first[c & 0xff]++; + } + } + if (*p != NUL || *s != NUL) { // lengths differ + return SP_FORMERROR; + } - // Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal - // list. - memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); - for (p = from, s = to; *p != NUL && *s != NUL; ) { - c = mb_cptr2char_adv((const char_u **)&p); - i = mb_cptr2char_adv((const char_u **)&s); - if (c >= 256) { - // Append the from-to chars at the end of the list with - // the low byte. - inp = ((int **)gap->ga_data)[c & 0xff]; - while (*inp != 0) - ++inp; - *inp++ = c; // from char - *inp++ = i; // to char - *inp++ = NUL; // NUL at the end - } else - // mapping byte to char is done in sl_sal_first[] - lp->sl_sal_first[c] = i; + // Allocate the lists. + for (int i = 0; i < 256; i++) { + if (lp->sl_sal_first[i] > 0) { + p = xmalloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1)); + ((int **)gap->ga_data)[i] = (int *)p; + *(int *)p = 0; } - } else { - // mapping bytes to bytes is done in sl_sal_first[] - if (STRLEN(from) != STRLEN(to)) - return SP_FORMERROR; + } - for (i = 0; to[i] != NUL; ++i) - lp->sl_sal_first[from[i]] = to[i]; - lp->sl_sal.ga_len = 1; // indicates we have soundfolding + // Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal + // list. + memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256); + for (p = from, s = to; *p != NUL && *s != NUL; ) { + const int c = mb_cptr2char_adv((const char_u **)&p); + const int i = mb_cptr2char_adv((const char_u **)&s); + if (c >= 256) { + // Append the from-to chars at the end of the list with + // the low byte. + int *inp = ((int **)gap->ga_data)[c & 0xff]; + while (*inp != 0) { + inp++; + } + *inp++ = c; // from char + *inp++ = i; // to char + *inp++ = NUL; // NUL at the end + } else { + // mapping byte to char is done in sl_sal_first[] + lp->sl_sal_first[c] = i; + } } return 0; @@ -1572,40 +1561,35 @@ static void set_sal_first(slang_T *lp) sfirst[i] = -1; } smp = (salitem_T *)gap->ga_data; - for (int i = 0; i < gap->ga_len; ++i) { - if (has_mbyte) - // Use the lowest byte of the first character. For latin1 it's - // the character, for other encodings it should differ for most - // characters. - c = *smp[i].sm_lead_w & 0xff; - else - c = *smp[i].sm_lead; + for (int i = 0; i < gap->ga_len; i++) { + // Use the lowest byte of the first character. For latin1 it's + // the character, for other encodings it should differ for most + // characters. + c = *smp[i].sm_lead_w & 0xff; if (sfirst[c] == -1) { sfirst[c] = i; - if (has_mbyte) { - int n; - - // Make sure all entries with this byte are following each - // other. Move the ones that are in the wrong position. Do - // keep the same ordering! - while (i + 1 < gap->ga_len - && (*smp[i + 1].sm_lead_w & 0xff) == c) - // Skip over entry with same index byte. - ++i; - - for (n = 1; i + n < gap->ga_len; ++n) - if ((*smp[i + n].sm_lead_w & 0xff) == c) { - salitem_T tsal; - - // Move entry with same index byte after the entries - // we already found. - ++i; - --n; - tsal = smp[i + n]; - memmove(smp + i + 1, smp + i, - sizeof(salitem_T) * n); - smp[i] = tsal; - } + + // Make sure all entries with this byte are following each + // other. Move the ones that are in the wrong position. Do + // keep the same ordering! + while (i + 1 < gap->ga_len + && (*smp[i + 1].sm_lead_w & 0xff) == c) { + // Skip over entry with same index byte. + i++; + } + + for (int n = 1; i + n < gap->ga_len; n++) { + if ((*smp[i + n].sm_lead_w & 0xff) == c) { + salitem_T tsal; + + // Move entry with same index byte after the entries + // we already found. + i++; + n--; + tsal = smp[i + n]; + memmove(smp + i + 1, smp + i, sizeof(salitem_T) * n); + smp[i] = tsal; + } } } } @@ -2454,12 +2438,8 @@ static afffile_T *spell_read_aff(spellinfo_T *spin, char_u *fname) // upper-case letter. if (aff_entry->ae_cond != NULL) { char_u buf[MAXLINELEN]; - if (has_mbyte) { - onecap_copy(items[4], buf, true); - aff_entry->ae_cond = getroom_save( - spin, buf); - } else - *aff_entry->ae_cond = c_up; + onecap_copy(items[4], buf, true); + aff_entry->ae_cond = getroom_save(spin, buf); if (aff_entry->ae_cond != NULL) { sprintf((char *)buf, "^%s", aff_entry->ae_cond); @@ -3373,13 +3353,9 @@ store_aff_word ( p = word; if (ae->ae_chop != NULL) { // Skip chop string. - if (has_mbyte) { - i = mb_charlen(ae->ae_chop); - for (; i > 0; i--) { - MB_PTR_ADV(p); - } - } else { - p += STRLEN(ae->ae_chop); + i = mb_charlen(ae->ae_chop); + for (; i > 0; i--) { + MB_PTR_ADV(p); } } STRCAT(newword, p); diff --git a/src/nvim/strings.c b/src/nvim/strings.c index 2f5491fda5..81a1a68a94 100644 --- a/src/nvim/strings.c +++ b/src/nvim/strings.c @@ -94,8 +94,8 @@ char_u *vim_strsave_escaped_ext(const char_u *string, const char_u *esc_chars, */ size_t length = 1; // count the trailing NUL for (const char_u *p = string; *p; p++) { - size_t l; - if (has_mbyte && (l = (size_t)(*mb_ptr2len)(p)) > 1) { + const size_t l = (size_t)(utfc_ptr2len(p)); + if (l > 1) { length += l; // count a multibyte char p += l - 1; continue; @@ -108,8 +108,8 @@ char_u *vim_strsave_escaped_ext(const char_u *string, const char_u *esc_chars, char_u *escaped_string = xmalloc(length); char_u *p2 = escaped_string; for (const char_u *p = string; *p; p++) { - size_t l; - if (has_mbyte && (l = (size_t)(*mb_ptr2len)(p)) > 1) { + const size_t l = (size_t)(utfc_ptr2len(p)); + if (l > 1) { memcpy(p2, p, l); p2 += l; p += l - 1; /* skip multibyte char */ @@ -349,7 +349,7 @@ char *strcase_save(const char *const orig, bool upper) // thus it's OK to do another malloc()/free(). int newl = utf_char2len(uc); if (newl != l) { - // TODO(philix): use xrealloc() in strup_save() + // TODO(philix): use xrealloc() in strcase_save() char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l)); memcpy(s, res, (size_t)(p - res)); STRCPY(s + (p - res) + newl, p + l); diff --git a/src/nvim/syntax.c b/src/nvim/syntax.c index 5ce126a593..2e593e39de 100644 --- a/src/nvim/syntax.c +++ b/src/nvim/syntax.c @@ -2960,11 +2960,7 @@ static int check_keyword_id( char_u *const kwp = line + startcol; int kwlen = 0; do { - if (has_mbyte) { - kwlen += (*mb_ptr2len)(kwp + kwlen); - } else { - kwlen++; - } + kwlen += utfc_ptr2len(kwp + kwlen); } while (vim_iswordp_buf(kwp + kwlen, syn_buf)); if (kwlen > MAXKEYWLEN) { diff --git a/src/nvim/testdir/test_digraph.vim b/src/nvim/testdir/test_digraph.vim index 1792dcc00b..9eea27740d 100644 --- a/src/nvim/testdir/test_digraph.vim +++ b/src/nvim/testdir/test_digraph.vim @@ -479,9 +479,6 @@ endfunc func Test_show_digraph_cp1251() throw 'skipped: Nvim supports ''utf8'' encoding only' - if !has('multi_byte') - return - endif new set encoding=cp1251 call Put_Dig("='") diff --git a/src/nvim/testdir/test_plus_arg_edit.vim b/src/nvim/testdir/test_plus_arg_edit.vim index e91a6e467a..e31680e7b6 100644 --- a/src/nvim/testdir/test_plus_arg_edit.vim +++ b/src/nvim/testdir/test_plus_arg_edit.vim @@ -10,10 +10,6 @@ function Test_edit() endfunction func Test_edit_bad() - if !has('multi_byte') - finish - endif - " Test loading a utf8 file with bad utf8 sequences. call writefile(["[\xff][\xc0][\xe2\x89\xf0][\xc2\xc2]"], "Xfile") new diff --git a/src/nvim/testdir/test_search.vim b/src/nvim/testdir/test_search.vim index 5db23c22a8..6824c50112 100644 --- a/src/nvim/testdir/test_search.vim +++ b/src/nvim/testdir/test_search.vim @@ -1053,7 +1053,7 @@ func Test_search_Ctrl_L_combining() " ' ̇' U+0307 Dec:775 COMBINING DOT ABOVE ̇ /\%u307\Z "\u0307" " ' ̣' U+0323 Dec:803 COMBINING DOT BELOW ̣ /\%u323 "\u0323" " Those should also appear on the commandline - if !has('multi_byte') || !exists('+incsearch') + if !exists('+incsearch') return endif call Cmdline3_prep() diff --git a/src/nvim/window.c b/src/nvim/window.c index 3429e3df70..4078cd31ac 100644 --- a/src/nvim/window.c +++ b/src/nvim/window.c @@ -6151,11 +6151,7 @@ file_name_in_line ( // Skip over the "\" in "\ ". ++len; } - if (has_mbyte) { - len += (size_t)(*mb_ptr2len)(ptr + len); - } else { - ++len; - } + len += (size_t)(utfc_ptr2len(ptr + len)); } /* |