diff options
Diffstat (limited to 'src/nvim/mbyte.c')
-rw-r--r-- | src/nvim/mbyte.c | 464 |
1 files changed, 304 insertions, 160 deletions
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index a345795bbe..01e720283e 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -32,6 +32,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <utf8proc.h> #include <uv.h> #include <wctype.h> @@ -43,6 +44,7 @@ #include "nvim/cmdexpand_defs.h" #include "nvim/cursor.h" #include "nvim/drawscreen.h" +#include "nvim/errors.h" #include "nvim/eval/typval.h" #include "nvim/eval/typval_defs.h" #include "nvim/getchar.h" @@ -83,7 +85,6 @@ struct interval { // uncrustify:off #ifdef INCLUDE_GENERATED_DECLARATIONS # include "mbyte.c.generated.h" -# include "unicode_tables.generated.h" #endif // uncrustify:on @@ -442,31 +443,10 @@ int mb_get_class_tab(const char *p, const uint64_t *const chartab) return utf_class_tab(utf_ptr2char(p), chartab); } -// Return true if "c" is in "table". -static bool intable(const struct interval *table, size_t n_items, int c) - FUNC_ATTR_PURE +static bool prop_is_emojilike(const utf8proc_property_t *prop) { - assert(n_items > 0); - // first quick check for Latin1 etc. characters - if (c < table[0].first) { - return false; - } - - assert(n_items <= SIZE_MAX / 2); - // binary search in table - size_t bot = 0; - size_t top = n_items; - do { - size_t mid = (bot + top) >> 1; - if (table[mid].last < c) { - bot = mid + 1; - } else if (table[mid].first > c) { - top = mid; - } else { - return true; - } - } while (top > bot); - return false; + return prop->boundclass == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC + || prop->boundclass == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR; } /// For UTF-8 character "c" return 2 for a double-width character, 1 for others. @@ -494,13 +474,18 @@ int utf_char2cells(int c) return n; } - if (intable(doublewidth, ARRAY_SIZE(doublewidth), c)) { + const utf8proc_property_t *prop = utf8proc_get_property(c); + + if (prop->charwidth == 2) { return 2; } - if (p_emoji && intable(emoji_wide, ARRAY_SIZE(emoji_wide), c)) { + if (*p_ambw == 'd' && prop->ambiguous_width) { return 2; } - if (*p_ambw == 'd' && intable(ambiguous, ARRAY_SIZE(ambiguous), c)) { + + // Characters below 1F000 may be considered single width traditionally, + // making them double width causes problems. + if (p_emoji && c >= 0x1f000 && !prop->ambiguous_width && prop_is_emojilike(prop)) { return 2; } @@ -509,31 +494,43 @@ int utf_char2cells(int c) /// Return the number of display cells character at "*p" occupies. /// This doesn't take care of unprintable characters, use ptr2cells() for that. -int utf_ptr2cells(const char *p) +int utf_ptr2cells(const char *p_in) { + const uint8_t *p = (const uint8_t *)p_in; // Need to convert to a character number. - if ((uint8_t)(*p) >= 0x80) { - int c = utf_ptr2char(p); + if ((*p) >= 0x80) { + int len = utf8len_tab[*p]; + int32_t c = utf_ptr2CharInfo_impl(p, (uintptr_t)len); // An illegal byte is displayed as <xx>. - if (utf_ptr2len(p) == 1 || c == NUL) { + if (c <= 0) { return 4; } // If the char is ASCII it must be an overlong sequence. if (c < 0x80) { return char2cells(c); } - return utf_char2cells(c); + int cells = utf_char2cells(c); + if (cells == 1 && p_emoji + && prop_is_emojilike(utf8proc_get_property(c))) { + int c2 = utf_ptr2char(p_in + len); + if (c2 == 0xFE0F) { + return 2; // emoji presentation + } + } + return cells; } return 1; } /// Convert a UTF-8 byte sequence to a character number. -/// Doesn't handle ascii! only multibyte and illegal sequences. +/// Doesn't handle ascii! only multibyte and illegal sequences. ASCII (including NUL) +/// are treated like illegal sequences. /// /// @param[in] p String to convert. /// @param[in] len Length of the character in bytes, 0 or 1 if illegal. /// -/// @return Unicode codepoint. A negative value when the sequence is illegal. +/// @return Unicode codepoint. A negative value when the sequence is illegal (or +/// ASCII, including NUL). int32_t utf_ptr2CharInfo_impl(uint8_t const *p, uintptr_t const len) FUNC_ATTR_PURE FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT { @@ -601,7 +598,8 @@ int utf_ptr2cells_len(const char *p, int size) { // Need to convert to a wide character. if (size > 0 && (uint8_t)(*p) >= 0x80) { - if (utf_ptr2len_len(p, size) < utf8len_tab[(uint8_t)(*p)]) { + int len = utf_ptr2len_len(p, size); + if (len < utf8len_tab[(uint8_t)(*p)]) { return 1; // truncated } int c = utf_ptr2char(p); @@ -613,7 +611,16 @@ int utf_ptr2cells_len(const char *p, int size) if (c < 0x80) { return char2cells(c); } - return utf_char2cells(c); + int cells = utf_char2cells(c); + if (cells == 1 && p_emoji && size > len + && prop_is_emojilike(utf8proc_get_property(c)) + && utf_ptr2len_len(p + len, size - len) == utf8len_tab[(uint8_t)p[len]]) { + int c2 = utf_ptr2char(p + len); + if (c2 == 0xFE0F) { + return 2; // emoji presentation + } + } + return cells; } return 1; } @@ -646,8 +653,8 @@ size_t mb_string2cells_len(const char *str, size_t size) size_t clen = 0; for (const char *p = str; *p != NUL && p < str + size; - p += utfc_ptr2len_len(p, (int)size + (int)(p - str))) { - clen += (size_t)utf_ptr2cells(p); + p += utfc_ptr2len_len(p, (int)size - (int)(p - str))) { + clen += (size_t)utf_ptr2cells_len(p, (int)size - (int)(p - str)); } return clen; @@ -791,29 +798,48 @@ int mb_cptr2char_adv(const char **pp) return c; } +/// When "c" is the first char of a string, determine if it needs to be prefixed +/// by a space byte to be drawn correctly, and not merge with the space left of +/// the string. +bool utf_iscomposing_first(int c) +{ + return c >= 128 && !utf8proc_grapheme_break(' ', c); +} + /// Check if the character pointed to by "p2" is a composing character when it -/// comes after "p1". For Arabic sometimes "ab" is replaced with "c", which -/// behaves like a composing character. -bool utf_composinglike(const char *p1, const char *p2) +/// comes after "p1". +/// +/// We use the definition in UAX#29 as implemented by utf8proc with the following +/// exceptions: +/// +/// - ASCII chars always begin a new cluster. This is a long assumed invariant +/// in the code base and very useful for performance (we can exit early for ASCII +/// all over the place, branch predictor go brrr in ASCII-only text). +/// As of Unicode 15.1 this will only break BOUNDCLASS_UREPEND followed by ASCII, +/// which should be exceedingly rare (these PREPEND chars are expected to be +/// followed by multibyte chars within the same script family) +/// +/// - When 'arabicshape' is active, some pairs of arabic letters "ab" is replaced with +/// "c" taking one single cell, which behaves like a cluster. +/// +/// @param "state" should be set to GRAPHEME_STATE_INIT before first call +/// it is allowed to be null, but will then not handle some longer +/// sequences, like ZWJ based emoji +bool utf_composinglike(const char *p1, const char *p2, GraphemeState *state) + FUNC_ATTR_NONNULL_ARG(1, 2) { - int c2 = utf_ptr2char(p2); - if (utf_iscomposing(c2)) { - return true; - } - if (!arabic_maycombine(c2)) { + if ((uint8_t)(*p2) < 128) { return false; } - return arabic_combine(utf_ptr2char(p1), c2); -} -/// Check if the next character is a composing character when it -/// comes after the first. For Arabic sometimes "ab" is replaced with "c", which -/// behaves like a composing character. -/// returns false for negative values -bool utf_char_composinglike(int32_t const first, int32_t const next) - FUNC_ATTR_PURE -{ - return utf_iscomposing(next) || arabic_combine(first, next); + int first = utf_ptr2char(p1); + int second = utf_ptr2char(p2); + + if (!utf8proc_grapheme_break_stateful(first, second, state)) { + return true; + } + + return arabic_combine(first, second); } /// Get the screen char at the beginning of a string @@ -832,7 +858,7 @@ schar_T utfc_ptr2schar(const char *p, int *firstc) { int c = utf_ptr2char(p); *firstc = c; // NOT optional, you are gonna need it - bool first_compose = utf_iscomposing(c); + bool first_compose = utf_iscomposing_first(c); size_t maxlen = MAX_SCHAR_SIZE - 1 - first_compose; size_t len = (size_t)utfc_ptr2len_len(p, (int)maxlen); @@ -843,16 +869,13 @@ schar_T utfc_ptr2schar(const char *p, int *firstc) return schar_from_buf_first(p, len, first_compose); } -/// Get the screen char at the beginning of a string with length +/// Get the screen char from a char with a known length /// /// Like utfc_ptr2schar but use no more than p[maxlen]. -schar_T utfc_ptr2schar_len(const char *p, int maxlen, int *firstc) +schar_T utfc_ptrlen2schar(const char *p, int len, int *firstc) FUNC_ATTR_NONNULL_ALL { - assert(maxlen > 0); - - size_t len = (size_t)utf_ptr2len_len(p, maxlen); - if (len > (size_t)maxlen || (len == 1 && (uint8_t)(*p) >= 0x80) || len == 0) { + if ((len == 1 && (uint8_t)(*p) >= 0x80) || len == 0) { // invalid or truncated sequence *firstc = (uint8_t)(*p); return 0; @@ -860,11 +883,13 @@ schar_T utfc_ptr2schar_len(const char *p, int maxlen, int *firstc) int c = utf_ptr2char(p); *firstc = c; - bool first_compose = utf_iscomposing(c); - maxlen = MIN(maxlen, MAX_SCHAR_SIZE - 1 - first_compose); - len = (size_t)utfc_ptr2len_len(p, maxlen); + bool first_compose = utf_iscomposing_first(c); + int maxlen = MAX_SCHAR_SIZE - 1 - first_compose; + if (len > maxlen) { + len = utfc_ptr2len_len(p, maxlen); + } - return schar_from_buf_first(p, len, first_compose); + return schar_from_buf_first(p, (size_t)len, first_compose); } /// Caller must ensure there is space for `first_compose` @@ -962,8 +987,9 @@ int utfc_ptr2len(const char *const p) // Check for composing characters. int prevlen = 0; + GraphemeState state = GRAPHEME_STATE_INIT; while (true) { - if ((uint8_t)p[len] < 0x80 || !utf_composinglike(p + prevlen, p + len)) { + if ((uint8_t)p[len] < 0x80 || !utf_composinglike(p + prevlen, p + len, &state)) { return len; } @@ -994,9 +1020,10 @@ int utfc_ptr2len_len(const char *p, int size) return 1; } - // Check for composing characters. We can handle only the first six, but + // Check for composing characters. We can only display a limited amount, but // skip all of them (otherwise the cursor would get stuck). int prevlen = 0; + GraphemeState state = GRAPHEME_STATE_INIT; while (len < size) { if ((uint8_t)p[len] < 0x80) { break; @@ -1009,7 +1036,7 @@ int utfc_ptr2len_len(const char *p, int size) break; } - if (!utf_composinglike(p + prevlen, p + len)) { + if (!utf_composinglike(p + prevlen, p + len, &state)) { break; } @@ -1082,13 +1109,21 @@ int utf_char2bytes(const int c, char *const buf) } } -/// Return true if "c" is a composing UTF-8 character. -/// This means it will be drawn on top of the preceding character. +/// Return true if "c" is a legacy composing UTF-8 character. +/// +/// This is deprecated in favour of utf_composinglike() which uses the modern +/// stateful algorithm to determine grapheme clusters. Still available +/// to support some legacy code which hasn't been refactored yet. +/// +/// To check if a char would combine with a preceeding space, use +/// utf_iscomposing_first() instead. +/// /// Based on code from Markus Kuhn. /// Returns false for negative values. -bool utf_iscomposing(int c) +bool utf_iscomposing_legacy(int c) { - return intable(combining, ARRAY_SIZE(combining), c); + const utf8proc_property_t *prop = utf8proc_get_property(c); + return prop->category == UTF8PROC_CATEGORY_MN || prop->category == UTF8PROC_CATEGORY_ME; } #ifdef __SSE2__ @@ -1133,6 +1168,33 @@ bool utf_printable(int c) #else +// Return true if "c" is in "table". +static bool intable(const struct interval *table, size_t n_items, int c) + FUNC_ATTR_PURE +{ + assert(n_items > 0); + // first quick check for Latin1 etc. characters + if (c < table[0].first) { + return false; + } + + assert(n_items <= SIZE_MAX / 2); + // binary search in table + size_t bot = 0; + size_t top = n_items; + do { + size_t mid = (bot + top) >> 1; + if (table[mid].last < c) { + bot = mid + 1; + } else if (table[mid].first > c) { + top = mid; + } else { + return true; + } + } while (top > bot); + return false; +} + // Return true for characters that can be displayed in a normal way. // Only for characters of 0x100 and above! bool utf_printable(int c) @@ -1255,8 +1317,9 @@ int utf_class_tab(const int c, const uint64_t *const chartab) return 1; // punctuation } + const utf8proc_property_t *prop = utf8proc_get_property(c); // emoji - if (intable(emoji_all, ARRAY_SIZE(emoji_all), c)) { + if (prop_is_emojilike(prop)) { return 3; } @@ -1276,47 +1339,51 @@ int utf_class_tab(const int c, const uint64_t *const chartab) return 2; } -bool utf_ambiguous_width(int c) +bool utf_ambiguous_width(const char *p) { - return c >= 0x80 && (intable(ambiguous, ARRAY_SIZE(ambiguous), c) - || intable(emoji_all, ARRAY_SIZE(emoji_all), c)); -} + // be quick if there is nothing to print or ASCII-only + if (p[0] == NUL || p[1] == NUL) { + return false; + } -// Generic conversion function for case operations. -// Return the converted equivalent of "a", which is a UCS-4 character. Use -// the given conversion "table". Uses binary search on "table". -static int utf_convert(int a, const convertStruct *const table, size_t n_items) -{ - // indices into table - size_t start = 0; - size_t end = n_items; - while (start < end) { - // need to search further - size_t mid = (end + start) / 2; - if (table[mid].rangeEnd < a) { - start = mid + 1; - } else { - end = mid; + CharInfo info = utf_ptr2CharInfo(p); + if (info.value >= 0x80) { + const utf8proc_property_t *prop = utf8proc_get_property(info.value); + if (prop->ambiguous_width || prop_is_emojilike(prop)) { + return true; } } - if (start < n_items - && table[start].rangeStart <= a - && a <= table[start].rangeEnd - && (a - table[start].rangeStart) % table[start].step == 0) { - return a + table[start].offset; - } - return a; + + // check if second sequence is 0xFE0F VS-16 which can turn things into emoji, + // safe with NUL (no second sequence) + return memcmp(p + info.len, "\xef\xb8\x8f", 3) == 0; } // Return the folded-case equivalent of "a", which is a UCS-4 character. Uses -// simple case folding. +// full case folding. int utf_fold(int a) { if (a < 0x80) { // be fast for ASCII return a >= 0x41 && a <= 0x5a ? a + 32 : a; } - return utf_convert(a, foldCase, ARRAY_SIZE(foldCase)); + + // TODO(dundargoc): utf8proc only does full case folding, which breaks some tests. This is a + // temporary workaround to circumvent failing tests. + // + // (0xdf) ß == ss in full casefolding. Using this however breaks the vim spell tests and the error + // E763 is thrown. This is due to the test spells relying on the vim spell files. + // + // (0x130) İ == i̇ in full casefolding. + if (a == 0xdf || a == 0x130) { + return a; + } + + utf8proc_int32_t result[1]; + + utf8proc_ssize_t res = utf8proc_decompose_char(a, result, 1, UTF8PROC_CASEFOLD, NULL); + + return (res == 1) ? result[0] : a; } // Vim's own character class functions. These exist because many library @@ -1324,9 +1391,6 @@ int utf_fold(int a) // invalid values or can't handle latin1 when the locale is C. // Speed is most important here. -// Note: UnicodeData.txt does not define U+1E9E as being the corresponding upper -// case letter for U+00DF (ß), however it is part of the toLower table - /// Return the upper-case equivalent of "a", which is a UCS-4 character. Use /// simple case folding. int mb_toupper(int a) @@ -1345,14 +1409,12 @@ int mb_toupper(int a) return TOUPPER_LOC(a); } - // For any other characters use the above mapping table. - return utf_convert(a, toUpper, ARRAY_SIZE(toUpper)); + return utf8proc_toupper(a); } bool mb_islower(int a) { - // German sharp s is lower case but has no upper case equivalent. - return (mb_toupper(a) != a) || a == 0xdf; + return mb_toupper(a) != a; } /// Return the lower-case equivalent of "a", which is a UCS-4 character. Use @@ -1373,8 +1435,7 @@ int mb_tolower(int a) return TOLOWER_LOC(a); } - // For any other characters use the above mapping table. - return utf_convert(a, toLower, ARRAY_SIZE(toLower)); + return utf8proc_tolower(a); } bool mb_isupper(int a) @@ -1388,7 +1449,7 @@ bool mb_isalpha(int a) return mb_islower(a) || mb_isupper(a); } -static int utf_strnicmp(const char *s1, const char *s2, size_t n1, size_t n2) +int utf_strnicmp(const char *s1, const char *s2, size_t n1, size_t n2) { int c1, c2; char buffer[6]; @@ -1545,7 +1606,7 @@ int utf16_to_utf8(const wchar_t *utf16, int utf16len, char **utf8) return uv_translate_sys_error(GetLastError()); } - (*utf8)[bufsize] = '\0'; + (*utf8)[bufsize] = NUL; return 0; } @@ -1673,6 +1734,26 @@ void show_utf8(void) msg(IObuff, 0); } +/// @return true if boundclass bc always starts a new cluster regardless of what's before +/// false negatives are allowed (perf cost, not correctness) +static bool always_break(int bc) +{ + return (bc == UTF8PROC_BOUNDCLASS_CONTROL); +} + +/// @return true if bc2 always starts a cluster after bc1 +/// false negatives are allowed (perf cost, not correctness) +static bool always_break_two(int bc1, int bc2) +{ + // don't check for UTF8PROC_BOUNDCLASS_CONTROL for bc2 as it either has been checked by + // "always_break" on first iteration or when it was bc1 in the previous iteration + return ((bc1 != UTF8PROC_BOUNDCLASS_PREPEND && bc2 == UTF8PROC_BOUNDCLASS_OTHER) + || (bc1 >= UTF8PROC_BOUNDCLASS_CR && bc1 <= UTF8PROC_BOUNDCLASS_CONTROL) + || (bc2 == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC + && (bc1 == UTF8PROC_BOUNDCLASS_OTHER + || bc1 == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC))); +} + /// Return offset from "p" to the start of a character, including composing characters. /// "base" must be the start of the string, which must be NUL terminated. /// If "p" points to the NUL at the end of the string return 0. @@ -1686,50 +1767,111 @@ int utf_head_off(const char *base_in, const char *p_in) const uint8_t *base = (uint8_t *)base_in; const uint8_t *p = (uint8_t *)p_in; - // Skip backwards over trailing bytes: 10xx.xxxx - // Skip backwards again if on a composing char. - const uint8_t *q; - for (q = p;; q--) { - // Move s to the last byte of this char. - const uint8_t *s; - for (s = q; (s[1] & 0xc0) == 0x80; s++) {} - - // Move q to the first byte of this char. - while (q > base && (*q & 0xc0) == 0x80) { - q--; - } - // Check for illegal sequence. Do allow an illegal byte after where we - // started. - int len = utf8len_tab[*q]; - if (len != (int)(s - q + 1) && len != (int)(p - q + 1)) { - return 0; + const uint8_t *start = p; + + // move start to the first byte of this codepoint + // might stop on a continuation byte if overlong, handled by utf_ptr2CharInfo_impl + while (start > base && (*start & 0xc0) == 0x80 && (p - start) < 6) { + start--; + } + + const uint8_t last_len = utf8len_tab[*start]; + int32_t cur_code = utf_ptr2CharInfo_impl(start, (uintptr_t)last_len); + if (cur_code < 0 || p - start >= last_len) { + return 0; // p must be part of an illegal sequence + } + const uint8_t * const safe_end = start + last_len; + + int cur_bc = utf8proc_get_property(cur_code)->boundclass; + if (always_break(cur_bc) || start == base) { + return (int)(p - start); + } + + // backtrack to find the start of a cluster. we might go too far, checked in the next loop + const uint8_t *cur_pos = start; + const uint8_t *const p_start = start; + + while (true) { + if (start[-1] == NUL) { + break; } - if (q <= base) { + start--; + if (*start < 0x80) { // stop on ascii, we are done break; } - int c = utf_ptr2char((char *)q); - if (utf_iscomposing(c)) { - continue; + while (start > base && (*start & 0xc0) == 0x80 && (cur_pos - start) < 6) { + start--; } - if (arabic_maycombine(c)) { - // Advance to get a sneak-peak at the next char - const uint8_t *j = q; - j--; - // Move j to the first byte of this char. - while (j > base && (*j & 0xc0) == 0x80) { - j--; - } - if (arabic_combine(utf_ptr2char((char *)j), c)) { - continue; - } + int prev_len = utf8len_tab[*start]; + int32_t prev_code = utf_ptr2CharInfo_impl(start, (uintptr_t)prev_len); + if (prev_code < 0 || prev_len < cur_pos - start) { + start = cur_pos; // start at valid sequence after invalid bytes + break; } - break; + + int prev_bc = utf8proc_get_property(prev_code)->boundclass; + if (always_break_two(prev_bc, cur_bc) && !arabic_combine(prev_code, cur_code)) { + start = cur_pos; // prev_code cannot be a part of this cluster + break; + } else if (start == base) { + break; + } + cur_pos = start; + cur_bc = prev_bc; + cur_code = prev_code; + } + + // hot path: we are already on the first codepoint of a sequence + if (start == p_start && last_len > p - start) { + return (int)(p - start); } - return (int)(p - q); + const uint8_t *q = start; + while (q < p) { + // don't need to find end of cluster. once we reached the codepoint of p, we are done + int len = utfc_ptr2len_len((const char *)q, (int)(safe_end - q)); + + if (q + len > p) { + return (int)(p - q); + } + + q += len; + } + + return 0; +} + +/// Assumes caller already handles ascii. see `utfc_next` +StrCharInfo utfc_next_impl(StrCharInfo cur) +{ + int32_t prev_code = cur.chr.value; + uint8_t *next = (uint8_t *)(cur.ptr + cur.chr.len); + GraphemeState state = GRAPHEME_STATE_INIT; + assert(*next >= 0x80); + + while (true) { + uint8_t const next_len = utf8len_tab[*next]; + int32_t const next_code = utf_ptr2CharInfo_impl(next, (uintptr_t)next_len); + if (utf8proc_grapheme_break_stateful(prev_code, next_code, &state) + && !arabic_combine(prev_code, next_code)) { + return (StrCharInfo){ + .ptr = (char *)next, + .chr = (CharInfo){ .value = next_code, .len = (next_code < 0 ? 1 : next_len) }, + }; + } + + prev_code = next_code; + next += next_len; + if (EXPECT(*next < 0x80U, true)) { + return (StrCharInfo){ + .ptr = (char *)next, + .chr = (CharInfo){ .value = *next, .len = 1 }, + }; + } + } } // Whether space is NOT allowed before/after 'c'. @@ -2688,7 +2830,7 @@ char *string_convert_ext(const vimconv_T *const vcp, char *ptr, size_t *lenp, si c = 0x100; break; // not in latin9 } } - if (!utf_iscomposing(c)) { // skip composing chars + if (!utf_iscomposing_legacy(c)) { // skip composing chars if (c < 0x100) { *d++ = (uint8_t)c; } else if (vcp->vc_fail) { @@ -2776,17 +2918,17 @@ void f_setcellwidths(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) emsg(_(e_listreq)); return; } + const list_T *const l = argvars[0].vval.v_list; - if (tv_list_len(l) == 0) { + cw_interval_T *table = NULL; + const size_t table_size = (size_t)tv_list_len(l); + if (table_size == 0) { // Clearing the table. - xfree(cw_table); - cw_table = NULL; - cw_table_size = 0; - return; + goto update; } // Note: use list_T instead of listitem_T so that TV_LIST_ITEM_NEXT can be used properly below. - const list_T **ptrs = xmalloc(sizeof(const list_T *) * (size_t)tv_list_len(l)); + const list_T **ptrs = xmalloc(sizeof(const list_T *) * table_size); // Check that all entries are a list with three numbers, the range is // valid and the cell width is valid. @@ -2838,12 +2980,12 @@ void f_setcellwidths(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) }); // Sort the list on the first number. - qsort((void *)ptrs, (size_t)tv_list_len(l), sizeof(const list_T *), tv_nr_compare); + qsort((void *)ptrs, table_size, sizeof(const list_T *), tv_nr_compare); - cw_interval_T *table = xmalloc(sizeof(cw_interval_T) * (size_t)tv_list_len(l)); + table = xmalloc(sizeof(cw_interval_T) * table_size); // Store the items in the new table. - for (item = 0; item < tv_list_len(l); item++) { + for (item = 0; (size_t)item < table_size; item++) { const list_T *const li_l = ptrs[item]; const listitem_T *lili = tv_list_first(li_l); const varnumber_T n1 = TV_LIST_ITEM_TV(lili)->vval.v_number; @@ -2862,10 +3004,12 @@ void f_setcellwidths(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) xfree((void *)ptrs); +update: + ; cw_interval_T *const cw_table_save = cw_table; const size_t cw_table_size_save = cw_table_size; cw_table = table; - cw_table_size = (size_t)tv_list_len(l); + cw_table_size = table_size; // Check that the new value does not conflict with 'listchars' or // 'fillchars'. |