aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorVanaIgr <vanaigranov@gmail.com>2024-02-07 01:03:45 -0600
committerGitHub <noreply@github.com>2024-02-07 15:03:45 +0800
commitcca8a78ea2ac0803d9e97ee761db9d3e31a77aeb (patch)
tree38ab066d1aab1b0ece59edb0c193ab5cd3d36877 /src
parent6725565258930ba430cfb925fd1671596a8a4342 (diff)
downloadrneovim-cca8a78ea2ac0803d9e97ee761db9d3e31a77aeb.tar.gz
rneovim-cca8a78ea2ac0803d9e97ee761db9d3e31a77aeb.tar.bz2
rneovim-cca8a78ea2ac0803d9e97ee761db9d3e31a77aeb.zip
perf: improve utf_char2cells() performance (#27353)
`utf_char2cells()` calls `utf_printable()` twice (sometimes indirectly, through `vim_isprintc()`) for characters >= 128. The function can be refactored to call to it only once. `utf_printable()` uses binary search on ranges of unprintable characters to determine if a given character is printable. Since there are only 9 ranges, and the first range contains only one character, binary search can be replaced with SSE2 SIMD comparisons that check 8 ranges at a time, and the first range is checked separately. SSE2 is enabled by default in GCC, Clang and MSVC for x86-64. Add 3-byte utf-8 to screenpos_spec benchmarks.
Diffstat (limited to 'src')
-rw-r--r--src/nvim/mbyte.c99
1 files changed, 71 insertions, 28 deletions
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index c0ba1d9e9b..8583b236c7 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -445,24 +445,26 @@ int mb_get_class_tab(const char *p, const uint64_t *const chartab)
static bool intable(const struct interval *table, size_t n_items, int c)
FUNC_ATTR_PURE
{
+ assert(n_items > 0);
// first quick check for Latin1 etc. characters
if (c < table[0].first) {
return false;
}
+ assert(n_items <= SIZE_MAX / 2);
// binary search in table
- int bot = 0;
- int top = (int)(n_items - 1);
- while (top >= bot) {
- int mid = (bot + top) / 2;
+ size_t bot = 0;
+ size_t top = n_items;
+ do {
+ size_t mid = (bot + top) >> 1;
if (table[mid].last < c) {
bot = mid + 1;
} else if (table[mid].first > c) {
- top = mid - 1;
+ top = mid;
} else {
return true;
}
- }
+ } while (top > bot);
return false;
}
@@ -476,32 +478,28 @@ static bool intable(const struct interval *table, size_t n_items, int c)
/// gen_unicode_tables.lua, which must be manually invoked as needed.
int utf_char2cells(int c)
{
- // Use the value from setcellwidths() at 0x80 and higher, unless the
- // character is not printable.
- if (c >= 0x80 && vim_isprintc(c)) {
- int n = cw_value(c);
- if (n != 0) {
- return n;
- }
+ if (c < 0x80) {
+ return 1;
}
- if (c >= 0x100) {
- if (!utf_printable(c)) {
- return 6; // unprintable, displays <xxxx>
- }
- if (intable(doublewidth, ARRAY_SIZE(doublewidth), c)) {
- return 2;
- }
- if (p_emoji && intable(emoji_wide, ARRAY_SIZE(emoji_wide), c)) {
- return 2;
- }
- } else if (c >= 0x80 && !vim_isprintc(c)) {
- // Characters below 0x100 are influenced by 'isprint' option.
- return 4; // unprintable, displays <xx>
+ if (!vim_isprintc(c)) {
+ assert(c <= 0xFFFF);
+ // unprintable is displayed either as <xx> or <xxxx>
+ return c > 0xFF ? 6 : 4;
+ }
+
+ int n = cw_value(c);
+ if (n != 0) {
+ return n;
}
- if (c >= 0x80 && *p_ambw == 'd'
- && intable(ambiguous, ARRAY_SIZE(ambiguous), c)) {
+ if (intable(doublewidth, ARRAY_SIZE(doublewidth), c)) {
+ return 2;
+ }
+ if (p_emoji && intable(emoji_wide, ARRAY_SIZE(emoji_wide), c)) {
+ return 2;
+ }
+ if (*p_ambw == 'd' && intable(ambiguous, ARRAY_SIZE(ambiguous), c)) {
return 2;
}
@@ -1092,9 +1090,52 @@ bool utf_iscomposing(int c)
return intable(combining, ARRAY_SIZE(combining), c);
}
+#ifdef __SSE2__
+
+# include <emmintrin.h>
+
// Return true for characters that can be displayed in a normal way.
// Only for characters of 0x100 and above!
bool utf_printable(int c)
+ FUNC_ATTR_CONST
+{
+ if (c < 0x180B || c > 0xFFFF) {
+ return c != 0x70F;
+ }
+
+# define L(v) ((int16_t)((v) - 1)) // lower bound (exclusive)
+# define H(v) ((int16_t)(v)) // upper bound (inclusive)
+
+ // Boundaries of unprintable characters.
+ // Some values are negative when converted to int16_t.
+ // Ranges must not wrap around when converted to int16_t.
+ __m128i const lo = _mm_setr_epi16(L(0x180b), L(0x200b), L(0x202a), L(0x2060),
+ L(0xd800), L(0xfeff), L(0xfff9), L(0xfffe));
+
+ __m128i const hi = _mm_setr_epi16(H(0x180e), H(0x200f), H(0x202e), H(0x206f),
+ H(0xdfff), H(0xfeff), H(0xfffb), H(0xffff));
+
+# undef L
+# undef H
+
+ __m128i value = _mm_set1_epi16((int16_t)c);
+
+ // Using _mm_cmplt_epi16() is less optimal, since it would require
+ // swapping operands (sse2 only has cmpgt instruction),
+ // and only the second operand can be a memory location.
+
+ // Character is printable when it is above/below both bounds of each range
+ // (corresponding bits in both masks are equal).
+ return _mm_movemask_epi8(_mm_cmpgt_epi16(value, lo))
+ == _mm_movemask_epi8(_mm_cmpgt_epi16(value, hi));
+}
+
+#else
+
+// Return true for characters that can be displayed in a normal way.
+// Only for characters of 0x100 and above!
+bool utf_printable(int c)
+ FUNC_ATTR_PURE
{
// Sorted list of non-overlapping intervals.
// 0xd800-0xdfff is reserved for UTF-16, actually illegal.
@@ -1107,6 +1148,8 @@ bool utf_printable(int c)
return !intable(nonprint, ARRAY_SIZE(nonprint), c);
}
+#endif
+
// Get class of a Unicode character.
// 0: white space
// 1: punctuation