diff options
author | VanaIgr <vanaigranov@gmail.com> | 2024-02-07 01:03:45 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-07 15:03:45 +0800 |
commit | cca8a78ea2ac0803d9e97ee761db9d3e31a77aeb (patch) | |
tree | 38ab066d1aab1b0ece59edb0c193ab5cd3d36877 /src | |
parent | 6725565258930ba430cfb925fd1671596a8a4342 (diff) | |
download | rneovim-cca8a78ea2ac0803d9e97ee761db9d3e31a77aeb.tar.gz rneovim-cca8a78ea2ac0803d9e97ee761db9d3e31a77aeb.tar.bz2 rneovim-cca8a78ea2ac0803d9e97ee761db9d3e31a77aeb.zip |
perf: improve utf_char2cells() performance (#27353)
`utf_char2cells()` calls `utf_printable()` twice (sometimes indirectly,
through `vim_isprintc()`) for characters >= 128. The function can be
refactored to call to it only once.
`utf_printable()` uses binary search on ranges of unprintable characters
to determine if a given character is printable. Since there are only 9
ranges, and the first range contains only one character, binary search
can be replaced with SSE2 SIMD comparisons that check 8 ranges at a
time, and the first range is checked separately. SSE2 is enabled by
default in GCC, Clang and MSVC for x86-64.
Add 3-byte utf-8 to screenpos_spec benchmarks.
Diffstat (limited to 'src')
-rw-r--r-- | src/nvim/mbyte.c | 99 |
1 files changed, 71 insertions, 28 deletions
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index c0ba1d9e9b..8583b236c7 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -445,24 +445,26 @@ int mb_get_class_tab(const char *p, const uint64_t *const chartab) static bool intable(const struct interval *table, size_t n_items, int c) FUNC_ATTR_PURE { + assert(n_items > 0); // first quick check for Latin1 etc. characters if (c < table[0].first) { return false; } + assert(n_items <= SIZE_MAX / 2); // binary search in table - int bot = 0; - int top = (int)(n_items - 1); - while (top >= bot) { - int mid = (bot + top) / 2; + size_t bot = 0; + size_t top = n_items; + do { + size_t mid = (bot + top) >> 1; if (table[mid].last < c) { bot = mid + 1; } else if (table[mid].first > c) { - top = mid - 1; + top = mid; } else { return true; } - } + } while (top > bot); return false; } @@ -476,32 +478,28 @@ static bool intable(const struct interval *table, size_t n_items, int c) /// gen_unicode_tables.lua, which must be manually invoked as needed. int utf_char2cells(int c) { - // Use the value from setcellwidths() at 0x80 and higher, unless the - // character is not printable. - if (c >= 0x80 && vim_isprintc(c)) { - int n = cw_value(c); - if (n != 0) { - return n; - } + if (c < 0x80) { + return 1; } - if (c >= 0x100) { - if (!utf_printable(c)) { - return 6; // unprintable, displays <xxxx> - } - if (intable(doublewidth, ARRAY_SIZE(doublewidth), c)) { - return 2; - } - if (p_emoji && intable(emoji_wide, ARRAY_SIZE(emoji_wide), c)) { - return 2; - } - } else if (c >= 0x80 && !vim_isprintc(c)) { - // Characters below 0x100 are influenced by 'isprint' option. - return 4; // unprintable, displays <xx> + if (!vim_isprintc(c)) { + assert(c <= 0xFFFF); + // unprintable is displayed either as <xx> or <xxxx> + return c > 0xFF ? 6 : 4; + } + + int n = cw_value(c); + if (n != 0) { + return n; } - if (c >= 0x80 && *p_ambw == 'd' - && intable(ambiguous, ARRAY_SIZE(ambiguous), c)) { + if (intable(doublewidth, ARRAY_SIZE(doublewidth), c)) { + return 2; + } + if (p_emoji && intable(emoji_wide, ARRAY_SIZE(emoji_wide), c)) { + return 2; + } + if (*p_ambw == 'd' && intable(ambiguous, ARRAY_SIZE(ambiguous), c)) { return 2; } @@ -1092,9 +1090,52 @@ bool utf_iscomposing(int c) return intable(combining, ARRAY_SIZE(combining), c); } +#ifdef __SSE2__ + +# include <emmintrin.h> + // Return true for characters that can be displayed in a normal way. // Only for characters of 0x100 and above! bool utf_printable(int c) + FUNC_ATTR_CONST +{ + if (c < 0x180B || c > 0xFFFF) { + return c != 0x70F; + } + +# define L(v) ((int16_t)((v) - 1)) // lower bound (exclusive) +# define H(v) ((int16_t)(v)) // upper bound (inclusive) + + // Boundaries of unprintable characters. + // Some values are negative when converted to int16_t. + // Ranges must not wrap around when converted to int16_t. + __m128i const lo = _mm_setr_epi16(L(0x180b), L(0x200b), L(0x202a), L(0x2060), + L(0xd800), L(0xfeff), L(0xfff9), L(0xfffe)); + + __m128i const hi = _mm_setr_epi16(H(0x180e), H(0x200f), H(0x202e), H(0x206f), + H(0xdfff), H(0xfeff), H(0xfffb), H(0xffff)); + +# undef L +# undef H + + __m128i value = _mm_set1_epi16((int16_t)c); + + // Using _mm_cmplt_epi16() is less optimal, since it would require + // swapping operands (sse2 only has cmpgt instruction), + // and only the second operand can be a memory location. + + // Character is printable when it is above/below both bounds of each range + // (corresponding bits in both masks are equal). + return _mm_movemask_epi8(_mm_cmpgt_epi16(value, lo)) + == _mm_movemask_epi8(_mm_cmpgt_epi16(value, hi)); +} + +#else + +// Return true for characters that can be displayed in a normal way. +// Only for characters of 0x100 and above! +bool utf_printable(int c) + FUNC_ATTR_PURE { // Sorted list of non-overlapping intervals. // 0xd800-0xdfff is reserved for UTF-16, actually illegal. @@ -1107,6 +1148,8 @@ bool utf_printable(int c) return !intable(nonprint, ARRAY_SIZE(nonprint), c); } +#endif + // Get class of a Unicode character. // 0: white space // 1: punctuation |