From a9c551e5e38f484e9055a930b3feaa9ac65d07be Mon Sep 17 00:00:00 2001 From: VanaIgr Date: Wed, 13 Dec 2023 13:25:48 -0600 Subject: perf: cache breakindent/showbreak width in win_lbr_chartabsize breakindent was recomputed on every call to win_lbr_charbabsize() when the character is past the end of the first row of a wrapped line. Even though the function for computing breakindent cached the last result, reusing the cached value required strcmp of the cached line with the given line. --- src/nvim/drawline.c | 5 ++--- src/nvim/plines.c | 64 +++++++++++++++++++++++++++++++---------------------- src/nvim/plines.h | 9 ++++---- 3 files changed, 44 insertions(+), 34 deletions(-) (limited to 'src') diff --git a/src/nvim/drawline.c b/src/nvim/drawline.c index cead63b88d..cc0fa441ca 100644 --- a/src/nvim/drawline.c +++ b/src/nvim/drawline.c @@ -2083,9 +2083,8 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, int col_rows, s char *p = ptr - (mb_off + 1); chartabsize_T cts; - init_chartabsize_arg(&cts, wp, lnum, wlv.vcol, line, p); - // do not want virtual text to be counted here - cts.cts_has_virt_text = false; + // lnum == 0, do not want virtual text to be counted here + init_chartabsize_arg(&cts, wp, 0, wlv.vcol, line, p); wlv.n_extra = win_lbr_chartabsize(&cts, NULL) - 1; clear_chartabsize_arg(&cts); diff --git a/src/nvim/plines.c b/src/nvim/plines.c index c2cf3796a7..48c43e155e 100644 --- a/src/nvim/plines.c +++ b/src/nvim/plines.c @@ -109,8 +109,8 @@ void win_linetabsize_cts(chartabsize_T *cts, colnr_T len) cts->cts_vcol += win_lbr_chartabsize(cts, NULL); } // check for inline virtual text after the end of the line - if (len == MAXCOL && cts->cts_has_virt_text && *cts->cts_ptr == NUL) { - win_lbr_chartabsize(cts, NULL); + if (len == MAXCOL && cts->virt_row >= 0 && *cts->cts_ptr == NUL) { + (void)win_lbr_chartabsize(cts, NULL); cts->cts_vcol += cts->cts_cur_text_width_left + cts->cts_cur_text_width_right; } } @@ -129,14 +129,14 @@ void init_chartabsize_arg(chartabsize_T *cts, win_T *wp, linenr_T lnum, colnr_T cts->cts_max_head_vcol = 0; cts->cts_cur_text_width_left = 0; cts->cts_cur_text_width_right = 0; - cts->cts_has_virt_text = false; - cts->cts_row = lnum - 1; + cts->virt_row = -1; + cts->indent_width = INT_MIN; - if (cts->cts_row >= 0 && wp->w_buffer->b_virt_text_inline > 0) { - marktree_itr_get(wp->w_buffer->b_marktree, cts->cts_row, 0, cts->cts_iter); + if (lnum > 0 && wp->w_buffer->b_virt_text_inline > 0) { + marktree_itr_get(wp->w_buffer->b_marktree, lnum - 1, 0, cts->cts_iter); MTKey mark = marktree_itr_current(cts->cts_iter); - if (mark.pos.row == cts->cts_row) { - cts->cts_has_virt_text = true; + if (mark.pos.row == lnum - 1) { + cts->virt_row = lnum - 1; } } } @@ -154,7 +154,7 @@ void clear_chartabsize_arg(chartabsize_T *cts) int lbr_chartabsize(chartabsize_T *cts) { if (!curwin->w_p_lbr && *get_showbreak_value(curwin) == NUL - && !curwin->w_p_bri && !cts->cts_has_virt_text) { + && !curwin->w_p_bri && cts->virt_row < 0) { if (curwin->w_p_wrap) { return win_nolbr_chartabsize(cts, NULL); } @@ -199,9 +199,11 @@ int win_lbr_chartabsize(chartabsize_T *cts, int *headp) cts->cts_cur_text_width_left = 0; cts->cts_cur_text_width_right = 0; + char *const sbr = get_showbreak_value(wp); + // No 'linebreak', 'showbreak' and 'breakindent': return quickly. - if (!wp->w_p_lbr && !wp->w_p_bri && *get_showbreak_value(wp) == NUL - && !cts->cts_has_virt_text) { + if (!wp->w_p_lbr && !wp->w_p_bri && *sbr == NUL + && cts->virt_row < 0) { if (wp->w_p_wrap) { return win_nolbr_chartabsize(cts, headp); } @@ -217,12 +219,12 @@ int win_lbr_chartabsize(chartabsize_T *cts, int *headp) } bool is_doublewidth = size == 2 && MB_BYTE2LEN((uint8_t)(*s)) > 1; - if (cts->cts_has_virt_text) { + if (cts->virt_row >= 0) { int tab_size = size; int col = (int)(s - line); while (true) { MTKey mark = marktree_itr_current(cts->cts_iter); - if (mark.pos.row != cts->cts_row || mark.pos.col > col) { + if (mark.pos.row != cts->virt_row || mark.pos.col > col) { break; } else if (mark.pos.col == col) { if (!mt_end(mark) && mark.flags & (MT_FLAG_DECOR_VIRT_TEXT_INLINE)) { @@ -260,7 +262,6 @@ int win_lbr_chartabsize(chartabsize_T *cts, int *headp) // May have to add something for 'breakindent' and/or 'showbreak' // string at the start of a screen line. int head = mb_added; - char *const sbr = get_showbreak_value(wp); // When "size" is 0, no new screen line is started. if (size > 0 && wp->w_p_wrap && (*sbr != NUL || wp->w_p_bri)) { int col_off_prev = win_col_off(wp); @@ -277,11 +278,16 @@ int win_lbr_chartabsize(chartabsize_T *cts, int *headp) if (wcol >= width2 && width2 > 0) { wcol %= width2; } - if (*sbr != NUL) { - head_prev += vim_strsize(sbr); - } - if (wp->w_p_bri) { - head_prev += get_breakindent_win(wp, line); + head_prev = cts->indent_width; + if (head_prev == INT_MIN) { + head_prev = 0; + if (*sbr != NUL) { + head_prev += vim_strsize(sbr); + } + if (wp->w_p_bri) { + head_prev += get_breakindent_win(wp, line); + } + cts->indent_width = head_prev; } if (wcol < head_prev) { head_prev -= wcol; @@ -298,12 +304,16 @@ int win_lbr_chartabsize(chartabsize_T *cts, int *headp) if (wcol + size > wp->w_width) { // cells taken by 'showbreak'/'breakindent' halfway current char - int head_mid = 0; - if (*sbr != NUL) { - head_mid += vim_strsize(sbr); - } - if (wp->w_p_bri) { - head_mid += get_breakindent_win(wp, line); + int head_mid = cts->indent_width; + if (head_mid == INT_MIN) { + head_mid = 0; + if (*sbr != NUL) { + head_mid += vim_strsize(sbr); + } + if (wp->w_p_bri) { + head_mid += get_breakindent_win(wp, line); + } + cts->indent_width = head_mid; } if (head_mid > 0 && wcol + size > wp->w_width_inner) { // Calculate effective window width. @@ -520,7 +530,7 @@ void getvcol(win_T *wp, pos_T *pos, colnr_T *start, colnr_T *cursor, colnr_T *en && !wp->w_p_lbr && *get_showbreak_value(wp) == NUL && !wp->w_p_bri - && !cts.cts_has_virt_text) { + && cts.virt_row < 0) { while (true) { head = 0; int c = (uint8_t)(*ptr); @@ -800,7 +810,7 @@ int plines_win_nofold(win_T *wp, linenr_T lnum) char *s = ml_get_buf(wp->w_buffer, lnum); chartabsize_T cts; init_chartabsize_arg(&cts, wp, lnum, 0, s, s); - if (*s == NUL && !cts.cts_has_virt_text) { + if (*s == NUL && cts.virt_row < 0) { return 1; // be quick for an empty line } win_linetabsize_cts(&cts, (colnr_T)MAXCOL); diff --git a/src/nvim/plines.h b/src/nvim/plines.h index 38024e622e..86ee7ef53c 100644 --- a/src/nvim/plines.h +++ b/src/nvim/plines.h @@ -12,15 +12,16 @@ typedef struct { win_T *cts_win; char *cts_line; ///< start of the line char *cts_ptr; ///< current position in line - int cts_row; + int cts_vcol; ///< virtual column at current position + int indent_width; ///< width of showbreak and breakindent on wrapped lines + /// INT_MIN if not yet calculated - bool cts_has_virt_text; ///< true if if there is inline virtual text + int virt_row; ///< line number, -1 if no virtual text int cts_cur_text_width_left; ///< width of virtual text left of cursor int cts_cur_text_width_right; ///< width of virtual text right of cursor - MarkTreeIter cts_iter[1]; - int cts_vcol; ///< virtual column at current position int cts_max_head_vcol; ///< see win_lbr_chartabsize() + MarkTreeIter cts_iter[1]; } chartabsize_T; #ifdef INCLUDE_GENERATED_DECLARATIONS -- cgit From 2f2f12122f3883dc6faf25751e176646f044db77 Mon Sep 17 00:00:00 2001 From: VanaIgr Date: Wed, 13 Dec 2023 13:54:28 -0600 Subject: perf: remove loop for computing last position in getvcol() The function computed the start of the multibyte character and iterated until the current character reached this position. The loop at the start of the function handed the case where the index passed to the function was past the end of the line. This loop can be removed if the function instead compares the next position against the end position end exits the loop if it is greater than the end position. --- src/nvim/plines.c | 30 +++++++++--------------------- 1 file changed, 9 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/nvim/plines.c b/src/nvim/plines.c index 48c43e155e..13b580b142 100644 --- a/src/nvim/plines.c +++ b/src/nvim/plines.c @@ -493,7 +493,6 @@ static int virt_text_cursor_off(chartabsize_T *cts, bool on_NUL) void getvcol(win_T *wp, pos_T *pos, colnr_T *start, colnr_T *cursor, colnr_T *end) { char *ptr; // points to current char - char *posptr; // points to char at pos->col int incr; int head; colnr_T *vts = wp->w_buffer->b_p_vts_array; @@ -502,21 +501,10 @@ void getvcol(win_T *wp, pos_T *pos, colnr_T *start, colnr_T *cursor, colnr_T *en colnr_T vcol = 0; char *line = ptr = ml_get_buf(wp->w_buffer, pos->lnum); // start of the line - if (pos->col == MAXCOL) { - // continue until the NUL - posptr = NULL; - } else { - // In a few cases the position can be beyond the end of the line. - for (colnr_T i = 0; i < pos->col; i++) { - if (ptr[i] == NUL) { - pos->col = i; - break; - } - } - posptr = ptr + pos->col; - posptr -= utf_head_off(line, posptr); + uintptr_t last_pos = (uintptr_t)(ptr + pos->col); + if (last_pos < (uintptr_t)ptr) { + last_pos = UINTPTR_MAX; // unsigned overflow } - chartabsize_T cts; bool on_NUL = false; init_chartabsize_arg(&cts, wp, pos->lnum, 0, line, line); @@ -566,13 +554,13 @@ void getvcol(win_T *wp, pos_T *pos, colnr_T *start, colnr_T *cursor, colnr_T *en } } - if ((posptr != NULL) && (ptr >= posptr)) { - // character at pos->col + char *const next = ptr + utfc_ptr2len(ptr); + if ((uintptr_t)next > last_pos) { break; } + ptr = next; vcol += incr; - MB_PTR_ADV(ptr); } } else { while (true) { @@ -589,13 +577,13 @@ void getvcol(win_T *wp, pos_T *pos, colnr_T *start, colnr_T *cursor, colnr_T *en break; } - if ((posptr != NULL) && (cts.cts_ptr >= posptr)) { - // character at pos->col + char *const next = cts.cts_ptr + utfc_ptr2len(cts.cts_ptr); + if ((uintptr_t)next > last_pos) { break; } + cts.cts_ptr = next; cts.cts_vcol += incr; - MB_PTR_ADV(cts.cts_ptr); } vcol = cts.cts_vcol; ptr = cts.cts_ptr; -- cgit From b5653984e5de514410b5654d2a9b92bdcb9eedf3 Mon Sep 17 00:00:00 2001 From: VanaIgr Date: Sun, 17 Dec 2023 16:48:27 -0600 Subject: perf: don't decode utf8 character multiple times in getvcol() The optimized virtual column calculation loop in getvcol() was decoding the current character twice: once in ptr2cells() and the second time in utfc_ptr2len(). For combining charcters, they were decoded up to 2 times in utfc_ptr2len(). Additionally, the function used to decode the character could be further optimised. --- src/nvim/arabic.c | 1 + src/nvim/macros_defs.h | 5 +- src/nvim/mbyte.c | 170 ++++++++++++++++++++++++++++++++++++++----------- src/nvim/mbyte.h | 66 +++++++++++++++++++ src/nvim/mbyte_defs.h | 11 ++++ src/nvim/plines.c | 38 +++++------ 6 files changed, 230 insertions(+), 61 deletions(-) (limited to 'src') diff --git a/src/nvim/arabic.c b/src/nvim/arabic.c index 665e61c277..4587415c3b 100644 --- a/src/nvim/arabic.c +++ b/src/nvim/arabic.c @@ -257,6 +257,7 @@ bool arabic_maycombine(int two) } /// Check whether we are dealing with Arabic combining characters. +/// Returns false for negative values. /// Note: these are NOT really composing characters! /// /// @param one First character. diff --git a/src/nvim/macros_defs.h b/src/nvim/macros_defs.h index a0dcafab95..67da29031c 100644 --- a/src/nvim/macros_defs.h +++ b/src/nvim/macros_defs.h @@ -111,10 +111,13 @@ #endif #if defined(__clang__) || defined(__GNUC__) +# define EXPECT(cond, value) __builtin_expect((cond), (value)) # define UNREACHABLE __builtin_unreachable() -#elif defined(_MSVC_VER) +#elif defined(_MSC_VER) +# define EXPECT(cond, value) (cond) # define UNREACHABLE __assume(false) #else +# define EXPECT(cond, value) (cond) # define UNREACHABLE #endif diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index 26c95c698f..b1f7b59b21 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -528,6 +528,74 @@ int utf_ptr2cells(const char *p) return 1; } +/// Convert a UTF-8 byte sequence to a character number. +/// Doesn't handle ascii! only multibyte and illegal sequences. +/// +/// @param[in] p String to convert. +/// @param[in] len Length of the character in bytes, 0 or 1 if illegal. +/// +/// @return Unicode codepoint. A negative value When the sequence is illegal. +int32_t utf_ptr2CharInfo_impl(uint8_t const *p, uintptr_t const len) + FUNC_ATTR_PURE FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT +{ +// uint8_t is a reminder for clang to use smaller cmp +#define CHECK \ + do { \ + if (EXPECT((uint8_t)(cur & 0xC0U) != 0x80U, false)) { \ + return -1; \ + } \ + } while (0) + + static uint32_t const corrections[] = { + (1U << 31), // invalid - set invalid bits (safe to add as first 2 bytes + (1U << 31), // won't affect highest bit in normal ret) + -(0x80U + (0xC0U << 6)), // multibyte - subtract added UTF8 bits (1..10xxx and 10xxx) + -(0x80U + (0x80U << 6) + (0xE0U << 12)), + -(0x80U + (0x80U << 6) + (0x80U << 12) + (0xF0U << 18)), + -(0x80U + (0x80U << 6) + (0x80U << 12) + (0x80U << 18) + (0xF8U << 24)), + -(0x80U + (0x80U << 6) + (0x80U << 12) + (0x80U << 18) + (0x80U << 24)), // + (0xFCU << 30) + }; + + // len is 0-6, but declared uintptr_t to avoid zeroing out upper bits + uint32_t const corr = corrections[len]; + uint8_t cur; + + // reading second byte unconditionally, safe for invalid + // as it cannot be the last byte, not safe for ascii + uint32_t code_point = ((uint32_t)p[0] << 6) + (cur = p[1]); + CHECK; + if ((uint32_t)len < 3) { + goto ret; // len == 0, 1, 2 + } + + code_point = (code_point << 6) + (cur = p[2]); + CHECK; + if ((uint32_t)len == 3) { + goto ret; + } + + code_point = (code_point << 6) + (cur = p[3]); + CHECK; + if ((uint32_t)len == 4) { + goto ret; + } + + code_point = (code_point << 6) + (cur = p[4]); + CHECK; + if ((uint32_t)len == 5) { + goto ret; + } + + code_point = (code_point << 6) + (cur = p[5]); + CHECK; + // len == 6 + +ret: + return (int32_t)(code_point + corr); + +#undef CHECK +} + /// Like utf_ptr2cells(), but limit string length to "size". /// For an empty string or truncated character returns 1. int utf_ptr2cells_len(const char *p, int size) @@ -597,45 +665,62 @@ size_t mb_string2cells_len(const char *str, size_t size) /// /// @return Unicode codepoint or byte value. int utf_ptr2char(const char *const p_in) - FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT + FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL { uint8_t *p = (uint8_t *)p_in; - if (p[0] < 0x80) { // Be quick for ASCII. - return p[0]; + + uint32_t const v0 = p[0]; + if (EXPECT(v0 < 0x80U, true)) { // Be quick for ASCII. + return (int)v0; } - const uint8_t len = utf8len_tab_zero[p[0]]; - if (len > 1 && (p[1] & 0xc0) == 0x80) { - if (len == 2) { - return ((p[0] & 0x1f) << 6) + (p[1] & 0x3f); - } - if ((p[2] & 0xc0) == 0x80) { - if (len == 3) { - return (((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) - + (p[2] & 0x3f)); - } - if ((p[3] & 0xc0) == 0x80) { - if (len == 4) { - return (((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12) - + ((p[2] & 0x3f) << 6) + (p[3] & 0x3f)); - } - if ((p[4] & 0xc0) == 0x80) { - if (len == 5) { - return (((p[0] & 0x03) << 24) + ((p[1] & 0x3f) << 18) - + ((p[2] & 0x3f) << 12) + ((p[3] & 0x3f) << 6) - + (p[4] & 0x3f)); - } - if ((p[5] & 0xc0) == 0x80 && len == 6) { - return (((p[0] & 0x01) << 30) + ((p[1] & 0x3f) << 24) - + ((p[2] & 0x3f) << 18) + ((p[3] & 0x3f) << 12) - + ((p[4] & 0x3f) << 6) + (p[5] & 0x3f)); - } - } - } - } + const uint8_t len = utf8len_tab[v0]; + if (EXPECT(len < 2, false)) { + return (int)v0; } - // Illegal value: just return the first byte. - return p[0]; + +#define CHECK(v) \ + do { \ + if (EXPECT((uint8_t)((v) & 0xC0U) != 0x80U, false)) { \ + return (int)v0; \ + } \ + } while (0) +#define LEN_RETURN(len_v, result) \ + do { \ + if (len == (len_v)) { \ + return (int)(result); \ + } \ + } while (0) +#define S(s) ((uint32_t)0x80U << (s)) + + uint32_t const v1 = p[1]; + CHECK(v1); + LEN_RETURN(2, (v0 << 6) + v1 - ((0xC0U << 6) + S(0))); + + uint32_t const v2 = p[2]; + CHECK(v2); + LEN_RETURN(3, (v0 << 12) + (v1 << 6) + v2 - ((0xE0U << 12) + S(6) + S(0))); + + uint32_t const v3 = p[3]; + CHECK(v3); + LEN_RETURN(4, (v0 << 18) + (v1 << 12) + (v2 << 6) + v3 + - ((0xF0U << 18) + S(12) + S(6) + S(0))); + + uint32_t const v4 = p[4]; + CHECK(v4); + LEN_RETURN(5, (v0 << 24) + (v1 << 18) + (v2 << 12) + (v3 << 6) + v4 + - ((0xF8U << 24) + S(18) + S(12) + S(6) + S(0))); + + uint32_t const v5 = p[5]; + CHECK(v5); + // len == 6 + return (int)((v0 << 30) + (v1 << 24) + (v2 << 18) + (v3 << 12) + (v4 << 6) + v5 + // - (0xFCU << 30) + - (S(24) + S(18) + S(12) + S(6) + S(0))); + +#undef S +#undef CHECK +#undef LEN_RETURN } // Convert a UTF-8 byte sequence to a wide character. @@ -722,6 +807,16 @@ bool utf_composinglike(const char *p1, const char *p2) return arabic_combine(utf_ptr2char(p1), c2); } +/// Check if the next character is a composing character when it +/// comes after the first. For Arabic sometimes "ab" is replaced with "c", which +/// behaves like a composing character. +/// returns false for negative values +bool utf_char_composinglike(int32_t const first, int32_t const next) + FUNC_ATTR_PURE +{ + return utf_iscomposing(next) || arabic_combine(first, next); +} + /// Get the screen char at the beginning of a string /// /// Caller is expected to check for things like unprintable chars etc @@ -988,9 +1083,10 @@ int utf_char2bytes(const int c, char *const buf) } } -// Return true if "c" is a composing UTF-8 character. This means it will be -// drawn on top of the preceding character. -// Based on code from Markus Kuhn. +/// Return true if "c" is a composing UTF-8 character. +/// This means it will be drawn on top of the preceding character. +/// Based on code from Markus Kuhn. +/// Returns false for negative values. bool utf_iscomposing(int c) { return intable(combining, ARRAY_SIZE(combining), c); diff --git a/src/nvim/mbyte.h b/src/nvim/mbyte.h index 2fb353de9e..68acc5075e 100644 --- a/src/nvim/mbyte.h +++ b/src/nvim/mbyte.h @@ -6,6 +6,7 @@ #include "nvim/cmdexpand_defs.h" // IWYU pragma: keep #include "nvim/eval/typval_defs.h" // IWYU pragma: keep +#include "nvim/macros_defs.h" #include "nvim/mbyte_defs.h" // IWYU pragma: keep #include "nvim/types_defs.h" // IWYU pragma: keep @@ -13,6 +14,10 @@ # include "mbyte.h.generated.h" #endif +enum { + kInvalidByteCells = 4, +}; + // Return byte length of character that starts with byte "b". // Returns 1 for a single-byte character. // MB_BYTE2LEN_CHECK() can be used to count a special key as one byte. @@ -44,3 +49,64 @@ extern const uint8_t utf8len_tab[256]; // multi-byte characters if needed. Only use with "p" > "s" ! #define MB_PTR_BACK(s, p) \ (p -= utf_head_off((char *)(s), (char *)(p) - 1) + 1) + +static inline CharInfo utf_ptr2CharInfo(char const *p_in) + REAL_FATTR_NONNULL_ALL REAL_FATTR_PURE REAL_FATTR_WARN_UNUSED_RESULT REAL_FATTR_ALWAYS_INLINE; + +/// Convert a UTF-8 byte sequence to a Unicode code point. +/// Handles ascii, multibyte sequiences and illegal sequences. +/// +/// @param[in] p_in String to convert. +/// +/// @return information abouth the character. When the sequence is illegal, +/// 'value' is negative, 'len' is 1. +static inline CharInfo utf_ptr2CharInfo(char const *const p_in) +{ + uint8_t const *const p = (uint8_t const *)p_in; + uint8_t const first = *p; + if (first < 0x80) { + return (CharInfo){ .value = first, .len = 1 }; + } else { + int len = utf8len_tab[first]; + int32_t const code_point = utf_ptr2CharInfo_impl(p, (uintptr_t)len); + if (code_point < 0) { + len = 1; + } + return (CharInfo){ .value = code_point, .len = len }; + } +} + +static inline StrCharInfo utfc_next(StrCharInfo cur) + REAL_FATTR_NONNULL_ALL REAL_FATTR_ALWAYS_INLINE REAL_FATTR_PURE; + +/// Return information about the next character. +/// Composing and combining characters are +/// considered a part of the current character. +/// +/// @param[in] cur Pointer to the current character. Must not point to NUL +/// @param[in] cur_char Decoded charater at 'cur'. +static inline StrCharInfo utfc_next(StrCharInfo cur) +{ + int32_t prev_code = cur.chr.value; + uint8_t *next = (uint8_t *)(cur.ptr + cur.chr.len); + + while (true) { + if (EXPECT(*next < 0x80U, true)) { + return (StrCharInfo){ + .ptr = (char *)next, + .chr = (CharInfo){ .value = *next, .len = 1 }, + }; + } + uint8_t const next_len = utf8len_tab[*next]; + int32_t const next_code = utf_ptr2CharInfo_impl(next, (uintptr_t)next_len); + if (!utf_char_composinglike(prev_code, next_code)) { + return (StrCharInfo){ + .ptr = (char *)next, + .chr = (CharInfo){ .value = next_code, .len = (next_code < 0 ? 1 : next_len) }, + }; + } + + prev_code = next_code; + next += next_len; + } +} diff --git a/src/nvim/mbyte_defs.h b/src/nvim/mbyte_defs.h index efb4f558a6..97aa1a638b 100644 --- a/src/nvim/mbyte_defs.h +++ b/src/nvim/mbyte_defs.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include "nvim/iconv_defs.h" @@ -55,3 +56,13 @@ typedef struct { bool vc_fail; ///< What to do with invalid characters: if true, fail, ///< otherwise use '?'. } vimconv_T; + +typedef struct { + int32_t value; ///< code point + int len; ///< length in bytes +} CharInfo; + +typedef struct { + char *ptr; ///< pointer to the first byte of the character + CharInfo chr; ///< the character +} StrCharInfo; diff --git a/src/nvim/plines.c b/src/nvim/plines.c index 13b580b142..1fa2812dab 100644 --- a/src/nvim/plines.c +++ b/src/nvim/plines.c @@ -511,55 +511,47 @@ void getvcol(win_T *wp, pos_T *pos, colnr_T *start, colnr_T *cursor, colnr_T *en cts.cts_max_head_vcol = -1; // This function is used very often, do some speed optimizations. - // When 'list', 'linebreak', 'showbreak' and 'breakindent' are not set + // When 'linebreak', 'showbreak' and 'breakindent' are not set // and there are no virtual text use a simple loop. - // Also use this when 'list' is set but tabs take their normal size. - if ((!wp->w_p_list || (wp->w_p_lcs_chars.tab1 != NUL)) - && !wp->w_p_lbr - && *get_showbreak_value(wp) == NUL - && !wp->w_p_bri - && cts.virt_row < 0) { + if (!wp->w_p_lbr && !wp->w_p_bri && cts.virt_row < 0 && *get_showbreak_value(wp) == NUL) { + bool const special_tab = !wp->w_p_list || wp->w_p_lcs_chars.tab1 != NUL; + CharInfo cur_char = utf_ptr2CharInfo(ptr); while (true) { head = 0; - int c = (uint8_t)(*ptr); - // make sure we don't go past the end of the line - if (c == NUL) { + if (cur_char.value == 0 && cur_char.len == 1) { // NUL at end of line only takes one column incr = 1; break; } // A tab gets expanded, depending on the current column - if (c == TAB) { + if (cur_char.value == TAB && special_tab) { incr = tabstop_padding(vcol, ts, vts); } else { - // For utf-8, if the byte is >= 0x80, need to look at - // further bytes to find the cell width. - if (c >= 0x80) { - incr = utf_ptr2cells(ptr); + if (cur_char.value < 0) { + incr = kInvalidByteCells; } else { - incr = byte2cells(c); + incr = char2cells(cur_char.value); } // If a double-cell char doesn't fit at the end of a line // it wraps to the next line, it's like this char is three // cells wide. - if ((incr == 2) - && wp->w_p_wrap - && (MB_BYTE2LEN((uint8_t)(*ptr)) > 1) - && in_win_border(wp, vcol)) { + if (incr == 2 && cur_char.value >= 0x80 + && wp->w_p_wrap && in_win_border(wp, vcol)) { incr++; head = 1; } } - char *const next = ptr + utfc_ptr2len(ptr); - if ((uintptr_t)next > last_pos) { + StrCharInfo const next_char = utfc_next((StrCharInfo){ ptr, cur_char }); + if ((uintptr_t)next_char.ptr > last_pos) { break; } - ptr = next; + cur_char = next_char.chr; + ptr = next_char.ptr; vcol += incr; } } else { -- cgit From cdf848a314bf91a0c87c717f9a44742dea877515 Mon Sep 17 00:00:00 2001 From: VanaIgr Date: Mon, 18 Dec 2023 20:57:04 -0600 Subject: perf: reuse fast character size calculation algorithm from getvcol() --- src/nvim/cursor.c | 21 +-- src/nvim/drawline.c | 45 ++--- src/nvim/edit.c | 91 +++++----- src/nvim/getchar.c | 22 +-- src/nvim/indent.c | 67 ++++---- src/nvim/mbyte.h | 8 + src/nvim/mouse.c | 21 +-- src/nvim/ops.c | 132 +++++++-------- src/nvim/plines.c | 475 +++++++++++++++++++++++----------------------------- src/nvim/plines.h | 96 +++++++++-- 10 files changed, 499 insertions(+), 479 deletions(-) (limited to 'src') diff --git a/src/nvim/cursor.c b/src/nvim/cursor.c index d8a63c1d7b..6c0a81838f 100644 --- a/src/nvim/cursor.c +++ b/src/nvim/cursor.c @@ -141,17 +141,18 @@ static int coladvance2(pos_T *pos, bool addspaces, bool finetune, colnr_T wcol_a } } - chartabsize_T cts; - init_chartabsize_arg(&cts, curwin, pos->lnum, 0, line, line); - while (cts.cts_vcol <= wcol && *cts.cts_ptr != NUL) { - // Count a tab for what it's worth (if list mode not on) - csize = win_lbr_chartabsize(&cts, &head); - MB_PTR_ADV(cts.cts_ptr); - cts.cts_vcol += csize; + CharsizeArg arg; + CSType cstype = init_charsize_arg(&arg, curwin, pos->lnum, line); + StrCharInfo ci = utf_ptr2StrCharInfo(line); + col = 0; + while (col <= wcol && *ci.ptr != NUL) { + CharSize cs = win_charsize(cstype, col, ci.ptr, ci.chr.value, &arg); + csize = cs.width; + head = cs.head; + col += cs.width; + ci = utfc_next(ci); } - col = cts.cts_vcol; - idx = (int)(cts.cts_ptr - line); - clear_chartabsize_arg(&cts); + idx = (int)(ci.ptr - line); // Handle all the special cases. The virtual_active() check // is needed to ensure that a virtual position off the end of diff --git a/src/nvim/drawline.c b/src/nvim/drawline.c index cc0fa441ca..616fd24460 100644 --- a/src/nvim/drawline.c +++ b/src/nvim/drawline.c @@ -1336,30 +1336,30 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, int col_rows, s if (start_col > 0 && col_rows == 0) { char *prev_ptr = ptr; - chartabsize_T cts; - int charsize = 0; - int head = 0; - - init_chartabsize_arg(&cts, wp, lnum, wlv.vcol, line, ptr); - cts.cts_max_head_vcol = start_col; - while (cts.cts_vcol < start_col && *cts.cts_ptr != NUL) { - head = 0; - charsize = win_lbr_chartabsize(&cts, &head); - cts.cts_vcol += charsize; - prev_ptr = cts.cts_ptr; - MB_PTR_ADV(cts.cts_ptr); + CharSize cs = { 0 }; + + CharsizeArg arg; + CSType cstype = init_charsize_arg(&arg, wp, lnum, line); + arg.max_head_vcol = start_col; + int vcol = wlv.vcol; + StrCharInfo ci = utf_ptr2StrCharInfo(ptr); + while (vcol < start_col && *ci.ptr != NUL) { + cs = win_charsize(cstype, vcol, ci.ptr, ci.chr.value, &arg); + vcol += cs.width; + prev_ptr = ci.ptr; + ci = utfc_next(ci); if (wp->w_p_list) { - in_multispace = *prev_ptr == ' ' && (*cts.cts_ptr == ' ' + in_multispace = *prev_ptr == ' ' && (*ci.ptr == ' ' || (prev_ptr > line && prev_ptr[-1] == ' ')); if (!in_multispace) { multispace_pos = 0; - } else if (cts.cts_ptr >= line + leadcol + } else if (ci.ptr >= line + leadcol && wp->w_p_lcs_chars.multispace != NULL) { multispace_pos++; if (wp->w_p_lcs_chars.multispace[multispace_pos] == NUL) { multispace_pos = 0; } - } else if (cts.cts_ptr < line + leadcol + } else if (ci.ptr < line + leadcol && wp->w_p_lcs_chars.leadmultispace != NULL) { multispace_pos++; if (wp->w_p_lcs_chars.leadmultispace[multispace_pos] == NUL) { @@ -1368,9 +1368,10 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, int col_rows, s } } } - wlv.vcol = cts.cts_vcol; - ptr = cts.cts_ptr; - clear_chartabsize_arg(&cts); + wlv.vcol = vcol; + ptr = ci.ptr; + int charsize = cs.width; + int head = cs.head; // When: // - 'cuc' is set, or @@ -2081,12 +2082,12 @@ int win_line(win_T *wp, linenr_T lnum, int startrow, int endrow, int col_rows, s && vim_isbreak(mb_c) && !vim_isbreak((uint8_t)(*ptr))) { int mb_off = utf_head_off(line, ptr - 1); char *p = ptr - (mb_off + 1); - chartabsize_T cts; + CharsizeArg arg; // lnum == 0, do not want virtual text to be counted here - init_chartabsize_arg(&cts, wp, 0, wlv.vcol, line, p); - wlv.n_extra = win_lbr_chartabsize(&cts, NULL) - 1; - clear_chartabsize_arg(&cts); + CSType cstype = init_charsize_arg(&arg, wp, 0, line); + wlv.n_extra = win_charsize(cstype, wlv.vcol, p, utf_ptr2CharInfo(p).value, + &arg).width - 1; if (on_last_col && mb_c != TAB) { // Do not continue search/match highlighting over the diff --git a/src/nvim/edit.c b/src/nvim/edit.c index 6f05ba5905..0e320056bb 100644 --- a/src/nvim/edit.c +++ b/src/nvim/edit.c @@ -1679,33 +1679,37 @@ void change_indent(int type, int amount, int round, int replaced, bool call_chan } else { // Compute the screen column where the cursor should be. vcol = get_indent() - vcol; - curwin->w_virtcol = (colnr_T)((vcol < 0) ? 0 : vcol); + int const end_vcol = (colnr_T)((vcol < 0) ? 0 : vcol); + curwin->w_virtcol = end_vcol; // Advance the cursor until we reach the right screen column. - int last_vcol = 0; - char *ptr = get_cursor_line_ptr(); - chartabsize_T cts; - init_chartabsize_arg(&cts, curwin, 0, 0, ptr, ptr); - while (cts.cts_vcol <= (int)curwin->w_virtcol) { - last_vcol = cts.cts_vcol; - if (cts.cts_vcol > 0) { - MB_PTR_ADV(cts.cts_ptr); - } - if (*cts.cts_ptr == NUL) { - break; + new_cursor_col = 0; + char *const line = get_cursor_line_ptr(); + vcol = 0; + if (*line != NUL) { + CharsizeArg arg; + CSType cstype = init_charsize_arg(&arg, curwin, 0, line); + StrCharInfo ci = utf_ptr2StrCharInfo(line); + while (true) { + int next_vcol = vcol + win_charsize(cstype, vcol, ci.ptr, ci.chr.value, &arg).width; + if (next_vcol > end_vcol) { + break; + } + vcol = next_vcol; + ci = utfc_next(ci); + if (*ci.ptr == NUL) { + break; + } } - cts.cts_vcol += lbr_chartabsize(&cts); + new_cursor_col = (int)(ci.ptr - line); } - vcol = last_vcol; - new_cursor_col = (int)(cts.cts_ptr - cts.cts_line); - clear_chartabsize_arg(&cts); // May need to insert spaces to be able to position the cursor on // the right screen column. if (vcol != (int)curwin->w_virtcol) { curwin->w_cursor.col = (colnr_T)new_cursor_col; size_t i = (size_t)(curwin->w_virtcol - vcol); - ptr = xmallocz(i); + char *ptr = xmallocz(i); memset(ptr, ' ', i); new_cursor_col += (int)i; ins_str(ptr); @@ -4347,14 +4351,16 @@ static bool ins_tab(void) getvcol(curwin, cursor, &want_vcol, NULL, NULL); char *tab = "\t"; - chartabsize_T cts; - init_chartabsize_arg(&cts, curwin, 0, vcol, tab, tab); + int32_t tab_v = (uint8_t)(*tab); + + CharsizeArg arg; + CSType cstype = init_charsize_arg(&arg, curwin, 0, tab); // Use as many TABs as possible. Beware of 'breakindent', 'showbreak' // and 'linebreak' adding extra virtual columns. while (ascii_iswhite(*ptr)) { - int i = lbr_chartabsize(&cts); - if (cts.cts_vcol + i > want_vcol) { + int i = win_charsize(cstype, vcol, tab, tab_v, &arg).width; + if (vcol + i > want_vcol) { break; } if (*ptr != TAB) { @@ -4369,23 +4375,18 @@ static bool ins_tab(void) } fpos.col++; ptr++; - cts.cts_vcol += i; + vcol += i; } - vcol = cts.cts_vcol; - clear_chartabsize_arg(&cts); if (change_col >= 0) { int repl_off = 0; // Skip over the spaces we need. - init_chartabsize_arg(&cts, curwin, 0, vcol, ptr, ptr); - while (cts.cts_vcol < want_vcol && *cts.cts_ptr == ' ') { - cts.cts_vcol += lbr_chartabsize(&cts); - cts.cts_ptr++; + cstype = init_charsize_arg(&arg, curwin, 0, ptr); + while (vcol < want_vcol && *ptr == ' ') { + vcol += win_charsize(cstype, vcol, ptr, (uint8_t)(' '), &arg).width; + ptr++; repl_off++; } - ptr = cts.cts_ptr; - vcol = cts.cts_vcol; - clear_chartabsize_arg(&cts); if (vcol > want_vcol) { // Must have a char with 'showbreak' just before it. @@ -4556,8 +4557,6 @@ static int ins_digraph(void) // Returns the char to be inserted, or NUL if none found. int ins_copychar(linenr_T lnum) { - char *ptr; - if (lnum < 1 || lnum > curbuf->b_ml.ml_line_count) { vim_beep(BO_COPY); return NUL; @@ -4565,24 +4564,22 @@ int ins_copychar(linenr_T lnum) // try to advance to the cursor column validate_virtcol(); + int const end_vcol = curwin->w_virtcol; char *line = ml_get(lnum); - char *prev_ptr = line; - - chartabsize_T cts; - init_chartabsize_arg(&cts, curwin, lnum, 0, line, line); - while (cts.cts_vcol < curwin->w_virtcol && *cts.cts_ptr != NUL) { - prev_ptr = cts.cts_ptr; - cts.cts_vcol += lbr_chartabsize_adv(&cts); - } - if (cts.cts_vcol > curwin->w_virtcol) { - ptr = prev_ptr; - } else { - ptr = cts.cts_ptr; + CharsizeArg arg; + CSType cstype = init_charsize_arg(&arg, curwin, lnum, line); + StrCharInfo ci = utf_ptr2StrCharInfo(line); + int vcol = 0; + while (vcol < end_vcol && *ci.ptr != NUL) { + vcol += win_charsize(cstype, vcol, ci.ptr, ci.chr.value, &arg).width; + if (vcol > end_vcol) { + break; + } + ci = utfc_next(ci); } - clear_chartabsize_arg(&cts); - int c = utf_ptr2char(ptr); + int c = ci.chr.value < 0 ? (uint8_t)(*ci.ptr) : ci.chr.value; if (c == NUL) { vim_beep(BO_COPY); } diff --git a/src/nvim/getchar.c b/src/nvim/getchar.c index 23937a6bb5..6515cc84da 100644 --- a/src/nvim/getchar.c +++ b/src/nvim/getchar.c @@ -2502,20 +2502,22 @@ static int vgetorpeek(bool advance) // we are expecting to truncate the trailing // white-space, so find the last non-white // character -- webb - if (did_ai - && *skipwhite(get_cursor_line_ptr() + curwin->w_cursor.col) == NUL) { + if (did_ai && *skipwhite(get_cursor_line_ptr() + curwin->w_cursor.col) == NUL) { curwin->w_wcol = 0; ptr = get_cursor_line_ptr(); - chartabsize_T cts; - init_chartabsize_arg(&cts, curwin, curwin->w_cursor.lnum, 0, ptr, ptr); - while (cts.cts_ptr < ptr + curwin->w_cursor.col) { - if (!ascii_iswhite(*cts.cts_ptr)) { - curwin->w_wcol = cts.cts_vcol; + char *endptr = ptr + curwin->w_cursor.col; + + CharsizeArg arg; + CSType cstype = init_charsize_arg(&arg, curwin, curwin->w_cursor.lnum, ptr); + StrCharInfo ci = utf_ptr2StrCharInfo(ptr); + int vcol = 0; + while (ci.ptr < endptr) { + if (!ascii_iswhite(ci.chr.value)) { + curwin->w_wcol = vcol; } - cts.cts_vcol += lbr_chartabsize(&cts); - cts.cts_ptr += utfc_ptr2len(cts.cts_ptr); + vcol += win_charsize(cstype, vcol, ci.ptr, ci.chr.value, &arg).width; + ci = utfc_next(ci); } - clear_chartabsize_arg(&cts); curwin->w_wrow = curwin->w_cline_row + curwin->w_wcol / curwin->w_width_inner; diff --git a/src/nvim/indent.c b/src/nvim/indent.c index 080f6f986a..4899a025e5 100644 --- a/src/nvim/indent.c +++ b/src/nvim/indent.c @@ -1246,18 +1246,19 @@ int get_lisp_indent(void) curwin->w_cursor.col = pos->col; colnr_T col = pos->col; - char *that = get_cursor_line_ptr(); + char *line = get_cursor_line_ptr(); - char *line = that; - chartabsize_T cts; - init_chartabsize_arg(&cts, curwin, pos->lnum, 0, line, line); - while (*cts.cts_ptr != NUL && col > 0) { - cts.cts_vcol += lbr_chartabsize_adv(&cts); + CharsizeArg arg; + CSType cstype = init_charsize_arg(&arg, curwin, pos->lnum, line); + + StrCharInfo sci = utf_ptr2StrCharInfo(line); + amount = 0; + while (*sci.ptr != NUL && col > 0) { + amount += win_charsize(cstype, amount, sci.ptr, sci.chr.value, &arg).width; + sci = utfc_next(sci); col--; } - amount = cts.cts_vcol; - that = cts.cts_ptr; - clear_chartabsize_arg(&cts); + char *that = sci.ptr; // Some keywords require "body" indenting rules (the // non-standard-lisp ones are Scheme special forms): @@ -1272,15 +1273,10 @@ int get_lisp_indent(void) } colnr_T firsttry = amount; - init_chartabsize_arg(&cts, curwin, (colnr_T)(that - line), - amount, line, that); - while (ascii_iswhite(*cts.cts_ptr)) { - cts.cts_vcol += lbr_chartabsize(&cts); - cts.cts_ptr++; + while (ascii_iswhite(*that)) { + amount += win_charsize(cstype, amount, that, (uint8_t)(*that), &arg).width; + that++; } - that = cts.cts_ptr; - amount = cts.cts_vcol; - clear_chartabsize_arg(&cts); if (*that && (*that != ';')) { // Not a comment line. @@ -1292,37 +1288,38 @@ int get_lisp_indent(void) parencount = 0; - init_chartabsize_arg(&cts, curwin, - (colnr_T)(that - line), amount, line, that); - if (((*that != '"') && (*that != '\'') && (*that != '#') - && (((uint8_t)(*that) < '0') || ((uint8_t)(*that) > '9')))) { + CharInfo ci = utf_ptr2CharInfo(that); + if (((ci.value != '"') && (ci.value != '\'') && (ci.value != '#') + && ((ci.value < '0') || (ci.value > '9')))) { int quotecount = 0; - while (*cts.cts_ptr - && (!ascii_iswhite(*cts.cts_ptr) || quotecount || parencount)) { - if (*cts.cts_ptr == '"') { + while (*that && (!ascii_iswhite(ci.value) || quotecount || parencount)) { + if (ci.value == '"') { quotecount = !quotecount; } - if (((*cts.cts_ptr == '(') || (*cts.cts_ptr == '[')) && !quotecount) { + if (((ci.value == '(') || (ci.value == '[')) && !quotecount) { parencount++; } - if (((*cts.cts_ptr == ')') || (*cts.cts_ptr == ']')) && !quotecount) { + if (((ci.value == ')') || (ci.value == ']')) && !quotecount) { parencount--; } - if ((*cts.cts_ptr == '\\') && (*(cts.cts_ptr + 1) != NUL)) { - cts.cts_vcol += lbr_chartabsize_adv(&cts); + if ((ci.value == '\\') && (*(that + 1) != NUL)) { + amount += win_charsize(cstype, amount, that, ci.value, &arg).width; + StrCharInfo next_sci = utfc_next((StrCharInfo){ that, ci }); + that = next_sci.ptr; + ci = next_sci.chr; } - cts.cts_vcol += lbr_chartabsize_adv(&cts); + amount += win_charsize(cstype, amount, that, ci.value, &arg).width; + StrCharInfo next_sci = utfc_next((StrCharInfo){ that, ci }); + that = next_sci.ptr; + ci = next_sci.chr; } } - while (ascii_iswhite(*cts.cts_ptr)) { - cts.cts_vcol += lbr_chartabsize(&cts); - cts.cts_ptr++; + while (ascii_iswhite(*that)) { + amount += win_charsize(cstype, amount, that, (uint8_t)(*that), &arg).width; + that++; } - that = cts.cts_ptr; - amount = cts.cts_vcol; - clear_chartabsize_arg(&cts); if (!*that || (*that == ';')) { amount = firsttry; diff --git a/src/nvim/mbyte.h b/src/nvim/mbyte.h index 68acc5075e..be632ee834 100644 --- a/src/nvim/mbyte.h +++ b/src/nvim/mbyte.h @@ -110,3 +110,11 @@ static inline StrCharInfo utfc_next(StrCharInfo cur) next += next_len; } } + +static inline StrCharInfo utf_ptr2StrCharInfo(char *ptr) + REAL_FATTR_NONNULL_ALL REAL_FATTR_ALWAYS_INLINE REAL_FATTR_PURE; + +static inline StrCharInfo utf_ptr2StrCharInfo(char *ptr) +{ + return (StrCharInfo){ .ptr = ptr, .chr = utf_ptr2CharInfo(ptr) }; +} diff --git a/src/nvim/mouse.c b/src/nvim/mouse.c index a6da7dd3c7..86de182aee 100644 --- a/src/nvim/mouse.c +++ b/src/nvim/mouse.c @@ -1755,22 +1755,23 @@ colnr_T vcol2col(win_T *wp, linenr_T lnum, colnr_T vcol, colnr_T *coladdp) { // try to advance to the specified column char *line = ml_get_buf(wp->w_buffer, lnum); - chartabsize_T cts; - init_chartabsize_arg(&cts, wp, lnum, 0, line, line); - while (cts.cts_vcol < vcol && *cts.cts_ptr != NUL) { - int size = win_lbr_chartabsize(&cts, NULL); - if (cts.cts_vcol + size > vcol) { + CharsizeArg arg; + CSType cstype = init_charsize_arg(&arg, wp, lnum, line); + StrCharInfo ci = utf_ptr2StrCharInfo(line); + int cur_vcol = 0; + while (cur_vcol < vcol && *ci.ptr != NUL) { + int next_vcol = cur_vcol + win_charsize(cstype, cur_vcol, ci.ptr, ci.chr.value, &arg).width; + if (next_vcol > vcol) { break; } - cts.cts_vcol += size; - MB_PTR_ADV(cts.cts_ptr); + cur_vcol = next_vcol; + ci = utfc_next(ci); } - clear_chartabsize_arg(&cts); if (coladdp != NULL) { - *coladdp = vcol - cts.cts_vcol; + *coladdp = vcol - cur_vcol; } - return (colnr_T)(cts.cts_ptr - line); + return (colnr_T)(ci.ptr - line); } /// Set UI mouse depending on current mode and 'mouse'. diff --git a/src/nvim/ops.c b/src/nvim/ops.c index 2819d98208..e6527773f9 100644 --- a/src/nvim/ops.c +++ b/src/nvim/ops.c @@ -387,17 +387,18 @@ static void shift_block(oparg_T *oap, int amount) } // TODO(vim): is passing bd.textstart for start of the line OK? - chartabsize_T cts; - init_chartabsize_arg(&cts, curwin, curwin->w_cursor.lnum, - bd.start_vcol, bd.textstart, bd.textstart); - while (ascii_iswhite(*cts.cts_ptr)) { - incr = lbr_chartabsize_adv(&cts); + CharsizeArg arg; + CSType cstype = init_charsize_arg(&arg, curwin, curwin->w_cursor.lnum, bd.textstart); + StrCharInfo ci = utf_ptr2StrCharInfo(bd.textstart); + int vcol = bd.start_vcol; + while (ascii_iswhite(ci.chr.value)) { + incr = win_charsize(cstype, vcol, ci.ptr, ci.chr.value, &arg).width; + ci = utfc_next(ci); total += incr; - cts.cts_vcol += incr; + vcol += incr; } - bd.textstart = cts.cts_ptr; - bd.start_vcol = cts.cts_vcol; - clear_chartabsize_arg(&cts); + bd.textstart = ci.ptr; + bd.start_vcol = vcol; int tabs = 0; int spaces = 0; @@ -448,16 +449,13 @@ static void shift_block(oparg_T *oap, int amount) // The character's column is in "bd.start_vcol". colnr_T non_white_col = bd.start_vcol; - chartabsize_T cts; - init_chartabsize_arg(&cts, curwin, curwin->w_cursor.lnum, - non_white_col, bd.textstart, non_white); - while (ascii_iswhite(*cts.cts_ptr)) { - incr = lbr_chartabsize_adv(&cts); - cts.cts_vcol += incr; + CharsizeArg arg; + CSType cstype = init_charsize_arg(&arg, curwin, curwin->w_cursor.lnum, bd.textstart); + while (ascii_iswhite(*non_white)) { + incr = win_charsize(cstype, non_white_col, non_white, (uint8_t)(*non_white), &arg).width; + non_white_col += incr; + non_white++; } - non_white_col = cts.cts_vcol; - non_white = cts.cts_ptr; - clear_chartabsize_arg(&cts); const colnr_T block_space_width = non_white_col - oap->start_vcol; // We will shift by "total" or "block_space_width", whichever is less. @@ -478,19 +476,17 @@ static void shift_block(oparg_T *oap, int amount) if (bd.startspaces) { verbatim_copy_width -= bd.start_char_vcols; } - init_chartabsize_arg(&cts, curwin, 0, verbatim_copy_width, - bd.textstart, verbatim_copy_end); - while (cts.cts_vcol < destination_col) { - incr = lbr_chartabsize(&cts); - if (cts.cts_vcol + incr > destination_col) { + cstype = init_charsize_arg(&arg, curwin, 0, bd.textstart); + StrCharInfo ci = utf_ptr2StrCharInfo(verbatim_copy_end); + while (verbatim_copy_width < destination_col) { + incr = win_charsize(cstype, verbatim_copy_width, ci.ptr, ci.chr.value, &arg).width; + if (verbatim_copy_width + incr > destination_col) { break; } - cts.cts_vcol += incr; - MB_PTR_ADV(cts.cts_ptr); + verbatim_copy_width += incr; + ci = utfc_next(ci); } - verbatim_copy_width = cts.cts_vcol; - verbatim_copy_end = cts.cts_ptr; - clear_chartabsize_arg(&cts); + verbatim_copy_end = ci.ptr; // If "destination_col" is different from the width of the initial // part of the line that will be copied, it means we encountered a tab @@ -3250,19 +3246,19 @@ void do_put(int regname, yankreg_T *reg, int dir, int count, int flags) } // get the old line and advance to the position to insert at char *oldp = get_cursor_line_ptr(); - size_t oldlen = strlen(oldp); - chartabsize_T cts; - init_chartabsize_arg(&cts, curwin, curwin->w_cursor.lnum, 0, oldp, oldp); - while (cts.cts_vcol < col && *cts.cts_ptr != NUL) { - // Count a tab for what it's worth (if list mode not on) - incr = lbr_chartabsize_adv(&cts); - cts.cts_vcol += incr; + CharsizeArg arg; + CSType cstype = init_charsize_arg(&arg, curwin, curwin->w_cursor.lnum, oldp); + StrCharInfo ci = utf_ptr2StrCharInfo(oldp); + vcol = 0; + while (vcol < col && *ci.ptr != NUL) { + incr = win_charsize(cstype, vcol, ci.ptr, ci.chr.value, &arg).width; + vcol += incr; + ci = utfc_next(ci); } - vcol = cts.cts_vcol; - char *ptr = cts.cts_ptr; + size_t oldlen = (size_t)(ci.ptr - oldp) + strlen(ci.ptr); + char *ptr = ci.ptr; bd.textcol = (colnr_T)(ptr - oldp); - clear_chartabsize_arg(&cts); shortline = (vcol < col) || (vcol == col && !*ptr); @@ -3286,16 +3282,15 @@ void do_put(int regname, yankreg_T *reg, int dir, int count, int flags) yanklen = (int)strlen(y_array[i]); if ((flags & PUT_BLOCK_INNER) == 0) { - // calculate number of spaces required to fill right side of - // block + // calculate number of spaces required to fill right side of block spaces = y_width + 1; - init_chartabsize_arg(&cts, curwin, 0, 0, y_array[i], y_array[i]); - for (int j = 0; j < yanklen; j++) { - spaces -= lbr_chartabsize(&cts); - cts.cts_ptr++; - cts.cts_vcol = 0; + + cstype = init_charsize_arg(&arg, curwin, 0, y_array[i]); + ci = utf_ptr2StrCharInfo(y_array[i]); + while (*ci.ptr != NUL) { + spaces -= win_charsize(cstype, 0, ci.ptr, ci.chr.value, &arg).width; + ci = utfc_next(ci); } - clear_chartabsize_arg(&cts); if (spaces < 0) { spaces = 0; } @@ -4228,25 +4223,25 @@ static void block_prep(oparg_T *oap, struct block_def *bdp, linenr_T lnum, bool char *line = ml_get(lnum); char *prev_pstart = line; - chartabsize_T cts; - init_chartabsize_arg(&cts, curwin, lnum, bdp->start_vcol, line, line); - while (cts.cts_vcol < oap->start_vcol && *cts.cts_ptr != NUL) { - // Count a tab for what it's worth (if list mode not on) - incr = lbr_chartabsize(&cts); - cts.cts_vcol += incr; - if (ascii_iswhite(*cts.cts_ptr)) { + CharsizeArg arg; + CSType cstype = init_charsize_arg(&arg, curwin, lnum, line); + StrCharInfo ci = utf_ptr2StrCharInfo(line); + int vcol = bdp->start_vcol; + while (vcol < oap->start_vcol && *ci.ptr != NUL) { + incr = win_charsize(cstype, vcol, ci.ptr, ci.chr.value, &arg).width; + vcol += incr; + if (ascii_iswhite(ci.chr.value)) { bdp->pre_whitesp += incr; bdp->pre_whitesp_c++; } else { bdp->pre_whitesp = 0; bdp->pre_whitesp_c = 0; } - prev_pstart = cts.cts_ptr; - MB_PTR_ADV(cts.cts_ptr); + prev_pstart = ci.ptr; + ci = utfc_next(ci); } - bdp->start_vcol = cts.cts_vcol; - char *pstart = cts.cts_ptr; - clear_chartabsize_arg(&cts); + bdp->start_vcol = vcol; + char *pstart = ci.ptr; bdp->start_char_vcols = incr; if (bdp->start_vcol < oap->start_vcol) { // line too short @@ -4283,17 +4278,18 @@ static void block_prep(oparg_T *oap, struct block_def *bdp, linenr_T lnum, bool } } } else { - init_chartabsize_arg(&cts, curwin, lnum, bdp->end_vcol, line, pend); + cstype = init_charsize_arg(&arg, curwin, lnum, line); + ci = utf_ptr2StrCharInfo(pend); + vcol = bdp->end_vcol; char *prev_pend = pend; - while (cts.cts_vcol <= oap->end_vcol && *cts.cts_ptr != NUL) { - // Count a tab for what it's worth (if list mode not on) - prev_pend = cts.cts_ptr; - incr = lbr_chartabsize_adv(&cts); - cts.cts_vcol += incr; - } - bdp->end_vcol = cts.cts_vcol; - pend = cts.cts_ptr; - clear_chartabsize_arg(&cts); + while (vcol <= oap->end_vcol && *ci.ptr != NUL) { + prev_pend = ci.ptr; + incr = win_charsize(cstype, vcol, ci.ptr, ci.chr.value, &arg).width; + vcol += incr; + ci = utfc_next(ci); + } + bdp->end_vcol = vcol; + pend = ci.ptr; if (bdp->end_vcol <= oap->end_vcol && (!is_del diff --git a/src/nvim/plines.c b/src/nvim/plines.c index 1fa2812dab..d63c032080 100644 --- a/src/nvim/plines.c +++ b/src/nvim/plines.c @@ -51,48 +51,21 @@ int win_chartabsize(win_T *wp, char *p, colnr_T col) return ptr2cells(p); } -/// Return the number of characters the string 's' will take on the screen, -/// taking into account the size of a tab. -/// -/// @param s -/// -/// @return Number of characters the string will take on the screen. -int linetabsize_str(char *s) -{ - return linetabsize_col(0, s); -} - -/// Like linetabsize_str(), but "s" starts at column "startcol". +/// Like linetabsize_str(), but "s" starts at virtual column "startvcol". /// /// @param startcol /// @param s /// /// @return Number of characters the string will take on the screen. -int linetabsize_col(int startcol, char *s) +int linetabsize_col(int startvcol, char *s) { - chartabsize_T cts; - init_chartabsize_arg(&cts, curwin, 0, startcol, s, s); - while (*cts.cts_ptr != NUL) { - cts.cts_vcol += lbr_chartabsize_adv(&cts); + CharsizeArg arg; + CSType const cstype = init_charsize_arg(&arg, curwin, 0, s); + if (cstype == kCharsizeFast) { + return linesize_fast(&arg, startvcol, MAXCOL); + } else { + return linesize_regular(&arg, startvcol, MAXCOL); } - clear_chartabsize_arg(&cts); - return cts.cts_vcol; -} - -/// Like linetabsize_str(), but for a given window instead of the current one. -/// -/// @param wp -/// @param line -/// @param len -/// -/// @return Number of characters the string will take on the screen. -int win_linetabsize(win_T *wp, linenr_T lnum, char *line, colnr_T len) -{ - chartabsize_T cts; - init_chartabsize_arg(&cts, wp, lnum, 0, line, line); - win_linetabsize_cts(&cts, len); - clear_chartabsize_arg(&cts); - return cts.cts_vcol; } /// Return the number of cells line "lnum" of window "wp" will take on the @@ -102,128 +75,79 @@ int linetabsize(win_T *wp, linenr_T lnum) return win_linetabsize(wp, lnum, ml_get_buf(wp->w_buffer, lnum), (colnr_T)MAXCOL); } -void win_linetabsize_cts(chartabsize_T *cts, colnr_T len) -{ - for (; *cts->cts_ptr != NUL && (len == MAXCOL || cts->cts_ptr < cts->cts_line + len); - MB_PTR_ADV(cts->cts_ptr)) { - cts->cts_vcol += win_lbr_chartabsize(cts, NULL); - } - // check for inline virtual text after the end of the line - if (len == MAXCOL && cts->virt_row >= 0 && *cts->cts_ptr == NUL) { - (void)win_lbr_chartabsize(cts, NULL); - cts->cts_vcol += cts->cts_cur_text_width_left + cts->cts_cur_text_width_right; - } -} - -/// Prepare the structure passed to chartabsize functions. +/// Prepare the structure passed to charsize functions. /// -/// "line" is the start of the line, "ptr" is the first relevant character. +/// "line" is the start of the line. /// When "lnum" is zero do not use inline virtual text. -void init_chartabsize_arg(chartabsize_T *cts, win_T *wp, linenr_T lnum, colnr_T col, char *line, - char *ptr) +CSType init_charsize_arg(CharsizeArg *cts, win_T *wp, linenr_T lnum, char *line) { - cts->cts_win = wp; - cts->cts_vcol = col; - cts->cts_line = line; - cts->cts_ptr = ptr; - cts->cts_max_head_vcol = 0; - cts->cts_cur_text_width_left = 0; - cts->cts_cur_text_width_right = 0; + cts->win = wp; + cts->line = line; + cts->max_head_vcol = 0; + cts->cur_text_width_left = 0; + cts->cur_text_width_right = 0; cts->virt_row = -1; cts->indent_width = INT_MIN; + cts->use_tabstop = !wp->w_p_list || wp->w_p_lcs_chars.tab1; if (lnum > 0 && wp->w_buffer->b_virt_text_inline > 0) { - marktree_itr_get(wp->w_buffer->b_marktree, lnum - 1, 0, cts->cts_iter); - MTKey mark = marktree_itr_current(cts->cts_iter); + marktree_itr_get(wp->w_buffer->b_marktree, lnum - 1, 0, cts->iter); + MTKey mark = marktree_itr_current(cts->iter); if (mark.pos.row == lnum - 1) { cts->virt_row = lnum - 1; } } -} - -/// Free any allocated item in "cts". -void clear_chartabsize_arg(chartabsize_T *cts) -{ -} -/// like win_chartabsize(), but also check for line breaks on the screen -/// -/// @param cts -/// -/// @return The number of characters taken up on the screen. -int lbr_chartabsize(chartabsize_T *cts) -{ - if (!curwin->w_p_lbr && *get_showbreak_value(curwin) == NUL - && !curwin->w_p_bri && cts->virt_row < 0) { - if (curwin->w_p_wrap) { - return win_nolbr_chartabsize(cts, NULL); - } - return win_chartabsize(curwin, cts->cts_ptr, cts->cts_vcol); + if (cts->virt_row >= 0 + || (wp->w_p_wrap && (wp->w_p_lbr || wp->w_p_bri || *get_showbreak_value(wp) != NUL))) { + return kCharsizeRegular; + } else { + return kCharsizeFast; } - return win_lbr_chartabsize(cts, NULL); } -/// Call lbr_chartabsize() and advance the pointer. -/// -/// @param cts -/// -/// @return The number of characters take up on the screen. -int lbr_chartabsize_adv(chartabsize_T *cts) -{ - int retval = lbr_chartabsize(cts); - MB_PTR_ADV(cts->cts_ptr); - return retval; -} - -/// Get the number of characters taken up on the screen indicated by "cts". -/// "cts->cts_cur_text_width_left" and "cts->cts_cur_text_width_right" are set +/// Get the number of characters taken up on the screen for the given cts and position. +/// "cts->cur_text_width_left" and "cts->cur_text_width_right" are set /// to the extra size for inline virtual text. /// This function is used very often, keep it fast!!!! /// -/// If "headp" not NULL, set "*headp" to the size of 'showbreak'/'breakindent' -/// included in the return value. -/// When "cts->cts_max_head_vcol" is positive, only count in "*headp" the size -/// of 'showbreak'/'breakindent' before "cts->cts_max_head_vcol". -/// When "cts->cts_max_head_vcol" is negative, only count in "*headp" the size +/// When "cts->max_head_vcol" is positive, only count in "head" the size +/// of 'showbreak'/'breakindent' before "cts->max_head_vcol". +/// When "cts->max_head_vcol" is negative, only count in "head" the size /// of 'showbreak'/'breakindent' before where cursor should be placed. -/// -/// Warning: "*headp" may not be set if it's 0, init to 0 before calling. -int win_lbr_chartabsize(chartabsize_T *cts, int *headp) +CharSize charsize_regular(CharsizeArg *cts, char *const cur, colnr_T const vcol, + int32_t const cur_char) { - win_T *wp = cts->cts_win; - char *line = cts->cts_line; // start of the line - char *s = cts->cts_ptr; - colnr_T vcol = cts->cts_vcol; - int mb_added = 0; - - cts->cts_cur_text_width_left = 0; - cts->cts_cur_text_width_right = 0; - - char *const sbr = get_showbreak_value(wp); + cts->cur_text_width_left = 0; + cts->cur_text_width_right = 0; - // No 'linebreak', 'showbreak' and 'breakindent': return quickly. - if (!wp->w_p_lbr && !wp->w_p_bri && *sbr == NUL - && cts->virt_row < 0) { - if (wp->w_p_wrap) { - return win_nolbr_chartabsize(cts, headp); - } - return win_chartabsize(wp, s, vcol); - } + win_T *wp = cts->win; + buf_T *buf = wp->w_buffer; + char *line = cts->line; + bool const use_tabstop = cur_char == TAB && cts->use_tabstop; + int mb_added = 0; bool has_lcs_eol = wp->w_p_list && wp->w_p_lcs_chars.eol != NUL; // First get normal size, without 'linebreak' or inline virtual text - int size = win_chartabsize(wp, s, vcol); - if (*s == NUL && !has_lcs_eol) { - size = 0; // NUL is not displayed + int size; + int is_doublewidth = false; + if (use_tabstop) { + size = tabstop_padding(vcol, buf->b_p_ts, buf->b_p_vts_array); + } else if (*cur == NUL && !has_lcs_eol) { + size = 0; + } else if (cur_char < 0) { + size = kInvalidByteCells; + } else { + size = char2cells(cur_char); + is_doublewidth = size == 2 && cur_char > 0x80; } - bool is_doublewidth = size == 2 && MB_BYTE2LEN((uint8_t)(*s)) > 1; if (cts->virt_row >= 0) { int tab_size = size; - int col = (int)(s - line); + int col = (int)(cur - line); while (true) { - MTKey mark = marktree_itr_current(cts->cts_iter); + MTKey mark = marktree_itr_current(cts->iter); if (mark.pos.row != cts->virt_row || mark.pos.col > col) { break; } else if (mark.pos.col == col) { @@ -233,15 +157,15 @@ int win_lbr_chartabsize(chartabsize_T *cts, int *headp) while (vt) { if (!(vt->flags & kVTIsLines) && vt->pos == kVPosInline) { if (mt_right(mark)) { - cts->cts_cur_text_width_right += vt->width; + cts->cur_text_width_right += vt->width; } else { - cts->cts_cur_text_width_left += vt->width; + cts->cur_text_width_left += vt->width; } size += vt->width; - if (*s == TAB) { + if (use_tabstop) { // tab size changes because of the inserted text size -= tab_size; - tab_size = win_chartabsize(wp, s, vcol + size); + tab_size = tabstop_padding(vcol + size, buf->b_p_ts, buf->b_p_vts_array); size += tab_size; } } @@ -249,7 +173,7 @@ int win_lbr_chartabsize(chartabsize_T *cts, int *headp) } } } - marktree_itr_next(wp->w_buffer->b_marktree, cts->cts_iter); + marktree_itr_next(wp->w_buffer->b_marktree, cts->iter); } } @@ -259,6 +183,8 @@ int win_lbr_chartabsize(chartabsize_T *cts, int *headp) mb_added = 1; } + char *const sbr = get_showbreak_value(wp); + // May have to add something for 'breakindent' and/or 'showbreak' // string at the start of a screen line. int head = mb_added; @@ -267,7 +193,7 @@ int win_lbr_chartabsize(chartabsize_T *cts, int *headp) int col_off_prev = win_col_off(wp); int width2 = wp->w_width_inner - col_off_prev + win_col_off2(wp); colnr_T wcol = vcol + col_off_prev; - colnr_T max_head_vcol = cts->cts_max_head_vcol; + colnr_T max_head_vcol = cts->max_head_vcol; int added = 0; // cells taken by 'showbreak'/'breakindent' before current char @@ -333,7 +259,7 @@ int win_lbr_chartabsize(chartabsize_T *cts, int *headp) head += (max_head_vcol - (vcol + head_prev + prev_rem) + width2 - 1) / width2 * head_mid; } else if (max_head_vcol < 0) { - int off = virt_text_cursor_off(cts, *s == NUL); + int off = virt_text_cursor_off(cts, *cur == NUL); if (off >= prev_rem) { if (size > off) { head += (1 + (off - prev_rem) / width) * head_mid; @@ -348,19 +274,16 @@ int win_lbr_chartabsize(chartabsize_T *cts, int *headp) size += added; } - if (headp != NULL) { - *headp = head; - } - + char *s = cur; colnr_T vcol_start = 0; // start from where to consider linebreak // If 'linebreak' set check at a blank before a non-blank if the line // needs a break here if (wp->w_p_lbr && wp->w_p_wrap && wp->w_width_inner != 0) { - char *t = cts->cts_line; + char *t = cts->line; while (vim_isbreak((uint8_t)t[0])) { t++; } - vcol_start = (colnr_T)(t - cts->cts_line); + vcol_start = (colnr_T)(t - cts->line); } if (wp->w_p_lbr && vcol_start <= vcol && vim_isbreak((uint8_t)s[0]) @@ -398,39 +321,50 @@ int win_lbr_chartabsize(chartabsize_T *cts, int *headp) } } - return size; + return (CharSize){ .width = size, .head = head }; } -/// Like win_lbr_chartabsize(), except that we know 'linebreak' is off and -/// 'wrap' is on. This means we need to check for a double-byte character that -/// doesn't fit at the end of the screen line. +/// Like charsize_regular(), except it doesn't handle virtual text, +/// linebreak, breakindent and showbreak. Handles normal characters, tabs and wrapping. +/// This function is always inlined. /// -/// @param cts -/// @param headp -/// -/// @return The number of characters take up on the screen. -static int win_nolbr_chartabsize(chartabsize_T *cts, int *headp) +/// @see charsize_regular +/// @see charsize_fast +static inline CharSize charsize_fast_impl(win_T *const wp, bool use_tabstop, colnr_T const vcol, + int32_t const cur_char) + FUNC_ATTR_PURE FUNC_ATTR_ALWAYS_INLINE { - win_T *wp = cts->cts_win; - char *s = cts->cts_ptr; - colnr_T col = cts->cts_vcol; - - if ((*s == TAB) && (!wp->w_p_list || wp->w_p_lcs_chars.tab1)) { - return tabstop_padding(col, - wp->w_buffer->b_p_ts, - wp->w_buffer->b_p_vts_array); - } - int n = ptr2cells(s); - - // Add one cell for a double-width character in the last column of the - // window, displayed with a ">". - if ((n == 2) && (MB_BYTE2LEN((uint8_t)(*s)) > 1) && in_win_border(wp, col)) { - if (headp != NULL) { - *headp = 1; + // A tab gets expanded, depending on the current column + if (cur_char == TAB && use_tabstop) { + return (CharSize){ + .width = tabstop_padding(vcol, wp->w_buffer->b_p_ts, + wp->w_buffer->b_p_vts_array) + }; + } else { + int width; + if (cur_char < 0) { + width = kInvalidByteCells; + } else { + width = char2cells(cur_char); + } + + // If a double-width char doesn't fit at the end of a line, it wraps to the next line, + // and the last column displays a '>'. + if (width == 2 && cur_char >= 0x80 && wp->w_p_wrap && in_win_border(wp, vcol)) { + return (CharSize){ .width = 3, .head = 1 }; + } else { + return (CharSize){ .width = width }; } - return 3; } - return n; +} + +/// Like charsize_regular(), except it doesn't handle virtual text, +/// linebreak, breakindent and showbreak. Handles normal characters, tabs and wrapping. +/// Can be used if CSType is kCharsizeFast. +CharSize charsize_fast(CharsizeArg *cts, colnr_T const vcol, int32_t const cur_char) + FUNC_ATTR_PURE +{ + return charsize_fast_impl(cts->win, cts->use_tabstop, vcol, cur_char); } /// Check that virtual column "vcol" is in the rightmost column of window "wp". @@ -461,19 +395,63 @@ static bool in_win_border(win_T *wp, colnr_T vcol) return (vcol - width1) % width2 == width2 - 1; } +/// Calculate virtual column until the given 'len'. +/// +/// @param arg Argument to charsize functions. +/// @param vcol Starting virtual column. +/// @param len First byte of the end character, or MAXCOL. +/// +/// @return virtual column before the character at 'len', +/// or full size of the line if 'len' is MAXCOL. +int linesize_regular(CharsizeArg *const arg, int vcol, colnr_T const len) +{ + char *const line = arg->line; + + StrCharInfo ci = utf_ptr2StrCharInfo(line); + while (ci.ptr - line < len && *ci.ptr != NUL) { + vcol += charsize_regular(arg, ci.ptr, vcol, ci.chr.value).width; + ci = utfc_next(ci); + } + + // Check for inline virtual text after the end of the line. + if (len == MAXCOL && arg->virt_row >= 0) { + (void)charsize_regular(arg, ci.ptr, vcol, ci.chr.value); + vcol += arg->cur_text_width_left + arg->cur_text_width_right; + } + + return vcol; +} + +/// Like win_linesize_regular, but can be used when CStype is kCharsizeFast. +/// +/// @see win_linesize_regular +int linesize_fast(CharsizeArg const *const arg, int vcol, colnr_T const len) +{ + win_T *const wp = arg->win; + bool const use_tabstop = arg->use_tabstop; + + char *const line = arg->line; + + StrCharInfo ci = utf_ptr2StrCharInfo(line); + while (ci.ptr - line < len && *ci.ptr != NUL) { + vcol += charsize_fast_impl(wp, use_tabstop, vcol, ci.chr.value).width; + ci = utfc_next(ci); + } + + return vcol; +} + /// Get how many virtual columns inline virtual text should offset the cursor. /// -/// @param cts should contain information stored by win_lbr_chartabsize() -/// about widths of left and right gravity virtual text /// @param on_NUL whether this is the end of the line -static int virt_text_cursor_off(chartabsize_T *cts, bool on_NUL) +static int virt_text_cursor_off(CharsizeArg *cts, bool on_NUL) { int off = 0; if (!on_NUL || !(State & MODE_NORMAL)) { - off += cts->cts_cur_text_width_left; + off += cts->cur_text_width_left; } if (!on_NUL && (State & MODE_NORMAL)) { - off += cts->cts_cur_text_width_right; + off += cts->cur_text_width_right; } return off; } @@ -492,95 +470,53 @@ static int virt_text_cursor_off(chartabsize_T *cts, bool on_NUL) /// @param end void getvcol(win_T *wp, pos_T *pos, colnr_T *start, colnr_T *cursor, colnr_T *end) { - char *ptr; // points to current char - int incr; - int head; - colnr_T *vts = wp->w_buffer->b_p_vts_array; - int ts = (int)wp->w_buffer->b_p_ts; - - colnr_T vcol = 0; - char *line = ptr = ml_get_buf(wp->w_buffer, pos->lnum); // start of the line + char *const line = ml_get_buf(wp->w_buffer, pos->lnum); // start of the line + int const end_col = pos->col; - uintptr_t last_pos = (uintptr_t)(ptr + pos->col); - if (last_pos < (uintptr_t)ptr) { - last_pos = UINTPTR_MAX; // unsigned overflow - } - chartabsize_T cts; + CharsizeArg arg; bool on_NUL = false; - init_chartabsize_arg(&cts, wp, pos->lnum, 0, line, line); - cts.cts_max_head_vcol = -1; - - // This function is used very often, do some speed optimizations. - // When 'linebreak', 'showbreak' and 'breakindent' are not set - // and there are no virtual text use a simple loop. - if (!wp->w_p_lbr && !wp->w_p_bri && cts.virt_row < 0 && *get_showbreak_value(wp) == NUL) { - bool const special_tab = !wp->w_p_list || wp->w_p_lcs_chars.tab1 != NUL; - CharInfo cur_char = utf_ptr2CharInfo(ptr); + CSType const cstype = init_charsize_arg(&arg, wp, pos->lnum, line); + arg.max_head_vcol = -1; + + colnr_T vcol = 0; + CharSize char_size; + StrCharInfo ci = utf_ptr2StrCharInfo(line); + if (cstype == kCharsizeFast) { + bool const use_tabstop = arg.use_tabstop; while (true) { - head = 0; - // make sure we don't go past the end of the line - if (cur_char.value == 0 && cur_char.len == 1) { - // NUL at end of line only takes one column - incr = 1; + if (*ci.ptr == NUL) { + // if cursor is at NUL, it is treated like 1 cell char + char_size = (CharSize){ .width = 1 }; break; } - - // A tab gets expanded, depending on the current column - if (cur_char.value == TAB && special_tab) { - incr = tabstop_padding(vcol, ts, vts); - } else { - if (cur_char.value < 0) { - incr = kInvalidByteCells; - } else { - incr = char2cells(cur_char.value); - } - - // If a double-cell char doesn't fit at the end of a line - // it wraps to the next line, it's like this char is three - // cells wide. - if (incr == 2 && cur_char.value >= 0x80 - && wp->w_p_wrap && in_win_border(wp, vcol)) { - incr++; - head = 1; - } - } - - StrCharInfo const next_char = utfc_next((StrCharInfo){ ptr, cur_char }); - if ((uintptr_t)next_char.ptr > last_pos) { + char_size = charsize_fast_impl(wp, use_tabstop, vcol, ci.chr.value); + StrCharInfo const next = utfc_next(ci); + if (next.ptr - line > end_col) { break; } - - cur_char = next_char.chr; - ptr = next_char.ptr; - vcol += incr; + ci = next; + vcol += char_size.width; } } else { while (true) { - // A tab gets expanded, depending on the current column - // Other things also take up space. - head = 0; - incr = win_lbr_chartabsize(&cts, &head); - - // make sure we don't go past the end of the line - if (*cts.cts_ptr == NUL) { - // NUL at end of line only takes one column, unless there is virtual text - incr = MAX(1, cts.cts_cur_text_width_left + cts.cts_cur_text_width_right); + char_size = charsize_regular(&arg, ci.ptr, vcol, ci.chr.value); + if (*ci.ptr == NUL) { + // if cursor is at NUL, it is treated like 1 cell char unless there is virtual text + char_size.width = MAX(1, arg.cur_text_width_left + arg.cur_text_width_right); on_NUL = true; break; } - - char *const next = cts.cts_ptr + utfc_ptr2len(cts.cts_ptr); - if ((uintptr_t)next > last_pos) { + StrCharInfo const next = utfc_next(ci); + if (next.ptr - line > end_col) { break; } - - cts.cts_ptr = next; - cts.cts_vcol += incr; + ci = next; + vcol += char_size.width; } - vcol = cts.cts_vcol; - ptr = cts.cts_ptr; } - clear_chartabsize_arg(&cts); + + int head = char_size.head; + int incr = char_size.width; if (start != NULL) { *start = vcol + head; @@ -591,7 +527,7 @@ void getvcol(win_T *wp, pos_T *pos, colnr_T *start, colnr_T *cursor, colnr_T *en } if (cursor != NULL) { - if ((*ptr == TAB) + if (ci.chr.value == TAB && (State & MODE_NORMAL) && !wp->w_p_list && !virtual_active() @@ -599,7 +535,7 @@ void getvcol(win_T *wp, pos_T *pos, colnr_T *start, colnr_T *cursor, colnr_T *en // cursor at end *cursor = vcol + incr - 1; } else { - vcol += virt_text_cursor_off(&cts, on_NUL); + vcol += virt_text_cursor_off(&arg, on_NUL); // cursor at start *cursor = vcol + head; } @@ -788,14 +724,18 @@ int plines_win_nofill(win_T *wp, linenr_T lnum, bool limit_winheight) int plines_win_nofold(win_T *wp, linenr_T lnum) { char *s = ml_get_buf(wp->w_buffer, lnum); - chartabsize_T cts; - init_chartabsize_arg(&cts, wp, lnum, 0, s, s); - if (*s == NUL && cts.virt_row < 0) { + CharsizeArg arg; + CSType const cstype = init_charsize_arg(&arg, wp, lnum, s); + if (*s == NUL && arg.virt_row < 0) { return 1; // be quick for an empty line } - win_linetabsize_cts(&cts, (colnr_T)MAXCOL); - clear_chartabsize_arg(&cts); - int64_t col = cts.cts_vcol; + + int64_t col; + if (cstype == kCharsizeFast) { + col = linesize_fast(&arg, 0, MAXCOL); + } else { + col = linesize_regular(&arg, 0, MAXCOL); + } // If list mode is on, then the '$' at the end of the line may take up one // extra column. @@ -834,26 +774,33 @@ int plines_win_col(win_T *wp, linenr_T lnum, long column) char *line = ml_get_buf(wp->w_buffer, lnum); - colnr_T col = 0; - chartabsize_T cts; + CharsizeArg cts; + CSType const cstype = init_charsize_arg(&cts, wp, lnum, line); - init_chartabsize_arg(&cts, wp, lnum, 0, line, line); - while (*cts.cts_ptr != NUL && --column >= 0) { - cts.cts_vcol += win_lbr_chartabsize(&cts, NULL); - MB_PTR_ADV(cts.cts_ptr); + colnr_T vcol = 0; + StrCharInfo ci = utf_ptr2StrCharInfo(line); + if (cstype == kCharsizeFast) { + bool const use_tabstop = cts.use_tabstop; + while (*ci.ptr != NUL && --column >= 0) { + vcol += charsize_fast_impl(wp, use_tabstop, vcol, ci.chr.value).width; + ci = utfc_next(ci); + } + } else { + while (*ci.ptr != NUL && --column >= 0) { + vcol += charsize_regular(&cts, ci.ptr, vcol, ci.chr.value).width; + ci = utfc_next(ci); + } } - // If *cts.cts_ptr is a TAB, and the TAB is not displayed as ^I, and we're not + // If current char is a TAB, and the TAB is not displayed as ^I, and we're not // in MODE_INSERT state, then col must be adjusted so that it represents the // last screen position of the TAB. This only fixes an error when the TAB // wraps from one screen line to the next (when 'columns' is not a multiple // of 'ts') -- webb. - col = cts.cts_vcol; - if (*cts.cts_ptr == TAB && (State & MODE_NORMAL) - && (!wp->w_p_list || wp->w_p_lcs_chars.tab1)) { - col += win_lbr_chartabsize(&cts, NULL) - 1; + colnr_T col = vcol; + if (ci.chr.value == TAB && (State & MODE_NORMAL) && cts.use_tabstop) { + col += win_charsize(cstype, col, ci.ptr, ci.chr.value, &cts).width - 1; } - clear_chartabsize_arg(&cts); // Add column offset for 'number', 'relativenumber', 'foldcolumn', etc. int width = wp->w_width_inner - win_col_off(wp); diff --git a/src/nvim/plines.h b/src/nvim/plines.h index 86ee7ef53c..4611101041 100644 --- a/src/nvim/plines.h +++ b/src/nvim/plines.h @@ -4,26 +4,96 @@ #include // IWYU pragma: keep #include "nvim/marktree_defs.h" +#include "nvim/mbyte_defs.h" #include "nvim/pos_defs.h" // IWYU pragma: keep #include "nvim/types_defs.h" -/// Argument for lbr_chartabsize(). +typedef bool CSType; + +enum { + kCharsizeRegular, + kCharsizeFast, +}; + +/// Argument for char size functions. typedef struct { - win_T *cts_win; - char *cts_line; ///< start of the line - char *cts_ptr; ///< current position in line - int cts_vcol; ///< virtual column at current position - int indent_width; ///< width of showbreak and breakindent on wrapped lines - /// INT_MIN if not yet calculated + win_T *win; + char *line; ///< start of the line + + bool use_tabstop; ///< use tabstop for tab insted of counting it as ^I + int indent_width; ///< width of showbreak and breakindent on wrapped lines + /// INT_MIN if not yet calculated - int virt_row; ///< line number, -1 if no virtual text - int cts_cur_text_width_left; ///< width of virtual text left of cursor - int cts_cur_text_width_right; ///< width of virtual text right of cursor + int virt_row; ///< line number, -1 if no virtual text + int cur_text_width_left; ///< width of virtual text left of cursor + int cur_text_width_right; ///< width of virtual text right of cursor - int cts_max_head_vcol; ///< see win_lbr_chartabsize() - MarkTreeIter cts_iter[1]; -} chartabsize_T; + int max_head_vcol; ///< see charsize_regular() + MarkTreeIter iter[1]; +} CharsizeArg; + +typedef struct { + int width; + int head; // size of breakindent etc. before the character (included in width) +} CharSize; #ifdef INCLUDE_GENERATED_DECLARATIONS # include "plines.h.generated.h" #endif + +static inline CharSize win_charsize(CSType cstype, int vcol, char *ptr, int32_t chr, + CharsizeArg *arg) + REAL_FATTR_NONNULL_ALL REAL_FATTR_WARN_UNUSED_RESULT REAL_FATTR_ALWAYS_INLINE; + +/// Get the number of cells taken up on the screen by the given character at vcol. +/// "arg->cur_text_width_left" and "arg->cur_text_width_right" are set +/// to the extra size for inline virtual text. +/// +/// When "arg->max_head_vcol" is positive, only count in "head" the size +/// of 'showbreak'/'breakindent' before "arg->max_head_vcol". +/// When "arg->max_head_vcol" is negative, only count in "head" the size +/// of 'showbreak'/'breakindent' before where cursor should be placed. +static inline CharSize win_charsize(CSType cstype, int vcol, char *ptr, int32_t chr, + CharsizeArg *arg) +{ + if (cstype == kCharsizeFast) { + return charsize_fast(arg, vcol, chr); + } else { + return charsize_regular(arg, ptr, vcol, chr); + } +} + +static inline int linetabsize_str(char *s) + REAL_FATTR_NONNULL_ALL REAL_FATTR_WARN_UNUSED_RESULT REAL_FATTR_ALWAYS_INLINE; + +/// Return the number of characters the string 's' will take on the screen, +/// taking into account the size of a tab. +/// +/// @param s +/// +/// @return Number of characters the string will take on the screen. +static inline int linetabsize_str(char *s) +{ + return linetabsize_col(0, s); +} + +static inline int win_linetabsize(win_T *wp, linenr_T lnum, char *line, colnr_T len) + REAL_FATTR_NONNULL_ALL REAL_FATTR_WARN_UNUSED_RESULT REAL_FATTR_ALWAYS_INLINE; + +/// Like linetabsize_str(), but for a given window instead of the current one. +/// +/// @param wp +/// @param line +/// @param len +/// +/// @return Number of characters the string will take on the screen. +static inline int win_linetabsize(win_T *wp, linenr_T lnum, char *line, colnr_T len) +{ + CharsizeArg arg; + CSType const cstype = init_charsize_arg(&arg, wp, lnum, line); + if (cstype == kCharsizeFast) { + return linesize_fast(&arg, 0, len); + } else { + return linesize_regular(&arg, 0, len); + } +} -- cgit