diff options
author | bfredl <bjorn.linse@gmail.com> | 2024-08-08 10:42:08 +0200 |
---|---|---|
committer | bfredl <bjorn.linse@gmail.com> | 2024-08-30 11:49:09 +0200 |
commit | cfdf68a7acde16597fbd896674af68c42361102c (patch) | |
tree | 6113193fda7a7c0f94577a464e39964e74311583 /src/nvim/grid.c | |
parent | 4353996d0fa8e5872a334d68196d8088391960cf (diff) | |
download | rneovim-cfdf68a7acde16597fbd896674af68c42361102c.tar.gz rneovim-cfdf68a7acde16597fbd896674af68c42361102c.tar.bz2 rneovim-cfdf68a7acde16597fbd896674af68c42361102c.zip |
feat(mbyte): support extended grapheme clusters including more emoji
Use the grapheme break algorithm from utf8proc to support grapheme
clusters from recent unicode versions.
Handle variant selector VS16 turning some codepoints into double-width
emoji. This means we need to use ptr2cells rather than char2cells when
possible.
Diffstat (limited to 'src/nvim/grid.c')
-rw-r--r-- | src/nvim/grid.c | 37 |
1 files changed, 30 insertions, 7 deletions
diff --git a/src/nvim/grid.c b/src/nvim/grid.c index 56246bf001..acb336c725 100644 --- a/src/nvim/grid.c +++ b/src/nvim/grid.c @@ -186,6 +186,24 @@ size_t schar_len(schar_T sc) } } +int schar_cells(schar_T sc) +{ + // hot path +#ifdef ORDER_BIG_ENDIAN + if (!(sc & 0x80FFFFFF)) { + return 1; + } +#else + if (sc < 0x80) { + return 1; + } +#endif + + char sc_buf[MAX_SCHAR_SIZE]; + schar_get(sc_buf, sc); + return utf_ptr2cells(sc_buf); +} + /// gets first raw UTF-8 byte of an schar static char schar_get_first_byte(schar_T sc) { @@ -428,14 +446,19 @@ int grid_line_puts(int col, const char *text, int textlen, int attr) const int max_col = grid_line_maxcol; while (col < max_col && (len < 0 || (int)(ptr - text) < len) && *ptr != NUL) { // check if this is the first byte of a multibyte - int mbyte_blen = len > 0 - ? utfc_ptr2len_len(ptr, (int)((text + len) - ptr)) - : utfc_ptr2len(ptr); + int mbyte_blen; + if (len >= 0) { + int maxlen = (int)((text + len) - ptr); + mbyte_blen = utfc_ptr2len_len(ptr, maxlen); + if (mbyte_blen > maxlen) { + mbyte_blen = 1; + } + } else { + mbyte_blen = utfc_ptr2len(ptr); + } int firstc; - schar_T schar = len >= 0 - ? utfc_ptr2schar_len(ptr, (int)((text + len) - ptr), &firstc) - : utfc_ptr2schar(ptr, &firstc); - int mbyte_cells = utf_char2cells(firstc); + schar_T schar = utfc_ptrlen2schar(ptr, mbyte_blen, &firstc); + int mbyte_cells = utf_ptr2cells_len(ptr, mbyte_blen); if (mbyte_cells > 2 || schar == 0) { mbyte_cells = 1; schar = schar_from_char(0xFFFD); |