diff options
Diffstat (limited to 'src/nvim/charset.c')
-rw-r--r-- | src/nvim/charset.c | 206 |
1 files changed, 93 insertions, 113 deletions
diff --git a/src/nvim/charset.c b/src/nvim/charset.c index 9e240fd38b..577fc13a31 100644 --- a/src/nvim/charset.c +++ b/src/nvim/charset.c @@ -1,3 +1,6 @@ +// This is an open source non-commercial project. Dear PVS-Studio, please check +// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com + /// @file charset.c /// /// Code related to character sets. @@ -15,6 +18,7 @@ #include "nvim/func_attr.h" #include "nvim/indent.h" #include "nvim/main.h" +#include "nvim/mark.h" #include "nvim/mbyte.h" #include "nvim/memline.h" #include "nvim/memory.h" @@ -41,8 +45,10 @@ static bool chartab_initialized = false; (buf)->b_chartab[(unsigned)(c) >> 6] |= (1ull << ((c) & 0x3f)) #define RESET_CHARTAB(buf, c) \ (buf)->b_chartab[(unsigned)(c) >> 6] &= ~(1ull << ((c) & 0x3f)) +#define GET_CHARTAB_TAB(chartab, c) \ + ((chartab)[(unsigned)(c) >> 6] & (1ull << ((c) & 0x3f))) #define GET_CHARTAB(buf, c) \ - ((buf)->b_chartab[(unsigned)(c) >> 6] & (1ull << ((c) & 0x3f))) + GET_CHARTAB_TAB((buf)->b_chartab, c) // Table used below, see init_chartab() for an explanation static char_u g_chartab[256]; @@ -88,7 +94,6 @@ int buf_init_chartab(buf_T *buf, int global) { int c; int c2; - char_u *p; int i; bool tilde; bool do_isalpha; @@ -142,7 +147,8 @@ int buf_init_chartab(buf_T *buf, int global) // Walk through the 'isident', 'iskeyword', 'isfname' and 'isprint' // options Each option is a list of characters, character numbers or // ranges, separated by commas, e.g.: "200-210,x,#-178,-" - for (i = global ? 0 : 3; i <= 3; ++i) { + for (i = global ? 0 : 3; i <= 3; i++) { + const char_u *p; if (i == 0) { // first round: 'isident' p = p_isi; @@ -167,7 +173,7 @@ int buf_init_chartab(buf_T *buf, int global) } if (ascii_isdigit(*p)) { - c = getdigits_int(&p); + c = getdigits_int((char_u **)&p); } else { c = mb_ptr2char_adv(&p); } @@ -177,7 +183,7 @@ int buf_init_chartab(buf_T *buf, int global) ++p; if (ascii_isdigit(*p)) { - c2 = getdigits_int(&p); + c2 = getdigits_int((char_u **)&p); } else { c2 = mb_ptr2char_adv(&p); } @@ -210,8 +216,8 @@ int buf_init_chartab(buf_T *buf, int global) // work properly when 'encoding' is "latin1" and the locale is // "C". if (!do_isalpha - || vim_islower(c) - || vim_isupper(c) + || mb_islower(c) + || mb_isupper(c) || (p_altkeymap && (F_isalpha(c) || F_isdigit(c)))) { if (i == 0) { // (re)set ID flag @@ -415,11 +421,11 @@ char_u* str_foldcase(char_u *str, int orglen, char_u *buf, int buflen) while (STR_CHAR(i) != NUL) { int c = utf_ptr2char(STR_PTR(i)); int olen = utf_ptr2len(STR_PTR(i)); - int lc = utf_tolower(c); + int lc = mb_tolower(c); // Only replace the character when it is not an invalid // sequence (ASCII character or more than one byte) and - // utf_tolower() doesn't return the original character. + // mb_tolower() doesn't return the original character. if (((c < 0x80) || (olen > 1)) && (c != lc)) { int nlen = utf_char2len(lc); @@ -539,18 +545,8 @@ void transchar_nonprint(char_u *buf, int c) buf[1] = (char_u)(c ^ 0x40); buf[2] = NUL; - } else if (c >= 0x80) { - transchar_hex(buf, c); - } else if ((c >= ' ' + 0x80) && (c <= '~' + 0x80)) { - // 0xa0 - 0xfe - buf[0] = '|'; - buf[1] = (char_u)(c - 0x80); - buf[2] = NUL; } else { - // 0x80 - 0x9f and 0xff - buf[0] = '~'; - buf[1] = (char_u)((c - 0x80) ^ 0x40); - buf[2] = NUL; + transchar_hex(buf, c); } } @@ -634,7 +630,7 @@ int char2cells(int c) /// @param p /// /// @return number of display cells. -int ptr2cells(char_u *p) +int ptr2cells(const char_u *p) { // For UTF-8 we need to look at more bytes if the first byte is >= 0x80. if (*p >= 0x80) { @@ -766,7 +762,7 @@ bool vim_isIDc(int c) } /// Check that "c" is a keyword character: -/// Letters and characters from 'iskeyword' option for current buffer. +/// Letters and characters from 'iskeyword' option for the current buffer. /// For multi-byte characters mb_get_class() is used (builtin rules). /// /// @param c character to check @@ -776,6 +772,20 @@ bool vim_iswordc(int c) return vim_iswordc_buf(c, curbuf); } +/// Check that "c" is a keyword character +/// Letters and characters from 'iskeyword' option for given buffer. +/// For multi-byte characters mb_get_class() is used (builtin rules). +/// +/// @param[in] c Character to check. +/// @param[in] chartab Buffer chartab. +bool vim_iswordc_tab(const int c, const uint64_t *const chartab) + FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL +{ + return (c >= 0x100 + ? (utf_class(c) >= 2) + : (c > 0 && GET_CHARTAB_TAB(chartab, c) != 0)); +} + /// Check that "c" is a keyword character: /// Letters and characters from 'iskeyword' option for given buffer. /// For multi-byte characters mb_get_class() is used (builtin rules). @@ -785,10 +795,7 @@ bool vim_iswordc(int c) bool vim_iswordc_buf(int c, buf_T *buf) FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ARG(2) { - if (c >= 0x100) { - return utf_class(c) >= 2; - } - return c > 0 && c < 0x100 && GET_CHARTAB(buf, c) != 0; + return vim_iswordc_tab(c, buf->b_chartab); } /// Just like vim_iswordc() but uses a pointer to the (multi-byte) character. @@ -974,10 +981,8 @@ int win_lbr_chartabsize(win_T *wp, char_u *line, char_u *s, colnr_T col, int *he mb_ptr_adv(s); c = *s; - if (!((c != NUL) - && (vim_isbreak(c) - || (!vim_isbreak(c) - && ((col2 == col) || !vim_isbreak(*ps)))))) { + if (!(c != NUL + && (vim_isbreak(c) || col2 == col || !vim_isbreak(*ps)))) { break; } @@ -1160,7 +1165,13 @@ void getvcol(win_T *wp, pos_T *pos, colnr_T *start, colnr_T *cursor, // continue until the NUL posptr = NULL; } else { + // Special check for an empty line, which can happen on exit, when + // ml_get_buf() always returns an empty string. + if (*ptr == NUL) { + pos->col = 0; + } posptr = ptr + pos->col; + posptr -= utf_head_off(line, posptr); } // This function is used very often, do some speed optimizations. @@ -1347,7 +1358,7 @@ void getvcols(win_T *wp, pos_T *pos1, pos_T *pos2, colnr_T *left, colnr_T to1; colnr_T to2; - if (ltp(pos1, pos2)) { + if (lt(*pos1, *pos2)) { getvvcol(wp, pos1, &from1, NULL, &to1); getvvcol(wp, pos2, &from2, NULL, &to2); } else { @@ -1378,7 +1389,8 @@ void getvcols(win_T *wp, pos_T *pos1, pos_T *pos2, colnr_T *left, /// /// @return Pointer to character after the skipped whitespace. char_u *skipwhite(const char_u *q) - FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT + FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL + FUNC_ATTR_NONNULL_RET { const char_u *p = q; while (ascii_iswhite(*p)) { @@ -1387,19 +1399,21 @@ char_u *skipwhite(const char_u *q) return (char_u *)p; } -/// skip over digits +/// Skip over digits /// -/// @param q +/// @param[in] q String to skip digits in. /// /// @return Pointer to the character after the skipped digits. -char_u* skipdigits(char_u *q) +char_u *skipdigits(const char_u *q) + FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL + FUNC_ATTR_NONNULL_RET { - char_u *p = q; + const char_u *p = q; while (ascii_isdigit(*p)) { // skip to next non-digit p++; } - return p; + return (char_u *)p; } /// skip over binary digits @@ -1484,78 +1498,17 @@ char_u* skiptohex(char_u *q) return p; } -// Vim's own character class functions. These exist because many library -// islower()/toupper() etc. do not work properly: they crash when used with -// invalid values or can't handle latin1 when the locale is C. -// Speed is most important here. - -/// Check that the character is lower-case +/// Skip over text until ' ' or '\t' or NUL /// -/// @param c character to check -bool vim_islower(int c) - FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT -{ - if (c <= '@') { - return false; - } - - if (c >= 0x80) { - return utf_islower(c); - } - return islower(c); -} - -/// Check that the character is upper-case -/// -/// @param c character to check -bool vim_isupper(int c) - FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT -{ - if (c <= '@') { - return false; - } - - if (c >= 0x80) { - return utf_isupper(c); - } - return isupper(c); -} - -int vim_toupper(int c) -{ - if (c <= '@') { - return c; - } - - if (c >= 0x80) { - return utf_toupper(c); - } - return TOUPPER_LOC(c); -} - -int vim_tolower(int c) -{ - if (c <= '@') { - return c; - } - - if (c >= 0x80) { - return utf_tolower(c); - } - return TOLOWER_LOC(c); -} - -/// skiptowhite: skip over text until ' ' or '\t' or NUL. -/// -/// @param p +/// @param[in] p Text to skip over. /// /// @return Pointer to the next whitespace or NUL character. -char_u* skiptowhite(char_u *p) +char_u *skiptowhite(const char_u *p) { while (*p != ' ' && *p != '\t' && *p != NUL) { p++; } - return p; + return (char_u *)p; } /// skiptowhite_esc: Like skiptowhite(), but also skip escaped chars @@ -1666,13 +1619,13 @@ bool vim_isblankline(char_u *lbuf) /// @param unptr Returns the unsigned result. /// @param maxlen Max length of string to check. void vim_str2nr(const char_u *const start, int *const prep, int *const len, - const int what, long *const nptr, unsigned long *const unptr, - const int maxlen) + const int what, varnumber_T *const nptr, + uvarnumber_T *const unptr, const int maxlen) { const char_u *ptr = start; int pre = 0; // default is decimal bool negative = false; - unsigned long un = 0; + uvarnumber_T un = 0; if (ptr[0] == '-') { negative = true; @@ -1728,7 +1681,12 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len, n += 2; // skip over "0b" } while ('0' <= *ptr && *ptr <= '1') { - un = 2 * un + (unsigned long)(*ptr - '0'); + // avoid ubsan error for overflow + if (un < UVARNUMBER_MAX / 2) { + un = 2 * un + (uvarnumber_T)(*ptr - '0'); + } else { + un = UVARNUMBER_MAX; + } ptr++; if (n++ == maxlen) { break; @@ -1737,7 +1695,12 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len, } else if ((pre == '0') || what == STR2NR_OCT + STR2NR_FORCE) { // octal while ('0' <= *ptr && *ptr <= '7') { - un = 8 * un + (unsigned long)(*ptr - '0'); + // avoid ubsan error for overflow + if (un < UVARNUMBER_MAX / 8) { + un = 8 * un + (uvarnumber_T)(*ptr - '0'); + } else { + un = UVARNUMBER_MAX; + } ptr++; if (n++ == maxlen) { break; @@ -1750,7 +1713,12 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len, n += 2; // skip over "0x" } while (ascii_isxdigit(*ptr)) { - un = 16 * un + (unsigned long)hex2nr(*ptr); + // avoid ubsan error for overflow + if (un < UVARNUMBER_MAX / 16) { + un = 16 * un + (uvarnumber_T)hex2nr(*ptr); + } else { + un = UVARNUMBER_MAX; + } ptr++; if (n++ == maxlen) { break; @@ -1759,7 +1727,12 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len, } else { // decimal while (ascii_isdigit(*ptr)) { - un = 10 * un + (unsigned long)(*ptr - '0'); + // avoid ubsan error for overflow + if (un < UVARNUMBER_MAX / 10) { + un = 10 * un + (uvarnumber_T)(*ptr - '0'); + } else { + un = UVARNUMBER_MAX; + } ptr++; if (n++ == maxlen) { break; @@ -1776,11 +1749,18 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len, } if (nptr != NULL) { - if (negative) { - // account for leading '-' for decimal numbers - *nptr = -(long)un; + if (negative) { // account for leading '-' for decimal numbers + // avoid ubsan error for overflow + if (un > VARNUMBER_MAX) { + *nptr = VARNUMBER_MIN; + } else { + *nptr = -(varnumber_T)un; + } } else { - *nptr = (long)un; + if (un > VARNUMBER_MAX) { + un = VARNUMBER_MAX; + } + *nptr = (varnumber_T)un; } } |