diff options
Diffstat (limited to 'src/nvim/charset.c')
-rw-r--r-- | src/nvim/charset.c | 261 |
1 files changed, 155 insertions, 106 deletions
diff --git a/src/nvim/charset.c b/src/nvim/charset.c index 980b4ed426..7d5f80c531 100644 --- a/src/nvim/charset.c +++ b/src/nvim/charset.c @@ -313,69 +313,112 @@ void trans_characters(char_u *buf, int bufsize) } } -/// Translate a string into allocated memory, replacing special chars with -/// printable chars. +/// Find length of a string capable of holding s with all specials replaced /// -/// @param s +/// Assumes replacing special characters with printable ones just like +/// strtrans() does. +/// +/// @param[in] s String to check. /// -/// @return translated string -char_u *transstr(char_u *s) FUNC_ATTR_NONNULL_RET +/// @return number of bytes needed to hold a translation of `s`, NUL byte not +/// included. +size_t transstr_len(const char *const s) + FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE { - char_u *res; - char_u *p; - int c; - size_t l; - char_u hexbuf[11]; - - // Compute the length of the result, taking account of unprintable - // multi-byte characters. + const char *p = s; size_t len = 0; - p = s; - while (*p != NUL) { - if ((l = (size_t)(*mb_ptr2len)(p)) > 1) { - c = (*mb_ptr2char)(p); - p += l; + while (*p) { + const size_t l = (size_t)utfc_ptr2len((const char_u *)p); + if (l > 1) { + int pcc[MAX_MCO + 2]; + pcc[0] = utfc_ptr2char((const char_u *)p, &pcc[1]); - if (vim_isprintc(c)) { + if (vim_isprintc(pcc[0])) { len += l; } else { - transchar_hex(hexbuf, c); - len += STRLEN(hexbuf); + for (size_t i = 0; i < ARRAY_SIZE(pcc); i++) { + char hexbuf[11]; + len += transchar_hex(hexbuf, pcc[i]); + } } + p += l; } else { - l = (size_t)byte2cells(*p++); - - if (l > 0) { - len += l; - } else { - // illegal byte sequence - len += 4; - } + const int b2c_l = byte2cells((uint8_t)(*p++)); + // Illegal byte sequence may occupy up to 4 characters. + len += (size_t)(b2c_l > 0 ? b2c_l : 4); } } - res = xmallocz(len); - - *res = NUL; - p = s; + return len; +} - while (*p != NUL) { - if ((l = (size_t)(*mb_ptr2len)(p)) > 1) { - c = (*mb_ptr2char)(p); +/// Replace special characters with printable ones +/// +/// @param[in] s String to replace characters from. +/// @param[out] buf Buffer to which result should be saved. +/// @param[in] len Buffer length. Resulting string may not occupy more then +/// len - 1 bytes (one for trailing NUL byte). +/// +/// @return length of the resulting string, without the NUL byte. +size_t transstr_buf(const char *const s, char *const buf, const size_t len) + FUNC_ATTR_NONNULL_ALL +{ + const char *p = s; + char *buf_p = buf; + char *const buf_e = buf_p + len - 1; + + while (*p != NUL && buf_p < buf_e) { + const size_t l = (size_t)utfc_ptr2len((const char_u *)p); + if (l > 1) { + if (buf_p + l >= buf_e) { + break; + } + int pcc[MAX_MCO + 2]; + pcc[0] = utfc_ptr2char((const char_u *)p, &pcc[1]); - if (vim_isprintc(c)) { - // append printable multi-byte char - STRNCAT(res, p, l); + if (vim_isprintc(pcc[0])) { + memmove(buf_p, p, l); + buf_p += l; } else { - transchar_hex(res + STRLEN(res), c); + for (size_t i = 0; i < ARRAY_SIZE(pcc); i++) { + char hexbuf[11]; + const size_t hexlen = transchar_hex(hexbuf, pcc[i]); + if (buf_p + hexlen >= buf_e) { + break; + } + memmove(buf_p, hexbuf, hexlen); + buf_p += hexlen; + } } p += l; } else { - STRCAT(res, transchar_byte(*p++)); + const char *const tb = (const char *)transchar_byte((uint8_t)(*p++)); + const size_t tb_len = strlen(tb); + memmove(buf_p, tb, tb_len); + buf_p += tb_len; } } + *buf_p = NUL; + assert(buf_p <= buf_e); + return (size_t)(buf_p - buf); +} - return res; +/// Copy string and replace special characters with printable characters +/// +/// Works like `strtrans()` does, used for that and in some other places. +/// +/// @param[in] s String to replace characters from. +/// +/// @return [allocated] translated string +char *transstr(const char *const s) + FUNC_ATTR_NONNULL_RET +{ + // Compute the length of the result, taking account of unprintable + // multi-byte characters. + const size_t len = transstr_len((const char *)s) + 1; + char *const buf = xmalloc(len); + transstr_buf(s, buf, len); + return buf; } /// Convert the string "str[orglen]" to do ignore-case comparing. @@ -474,14 +517,16 @@ char_u* str_foldcase(char_u *str, int orglen, char_u *buf, int buflen) // Does NOT work for multi-byte characters, c must be <= 255. // Also doesn't work for the first byte of a multi-byte, "c" must be a // character! -static char_u transchar_buf[7]; +static char_u transchar_buf[11]; -/// Translates a character +/// Translate a character into a printable one, leaving printable ASCII intact /// -/// @param c +/// All unicode characters are considered non-printable in this function. /// -/// @return translated character. -char_u* transchar(int c) +/// @param[in] c Character to translate. +/// +/// @return translated character into a static buffer. +char_u *transchar(int c) { int i = 0; if (IS_SPECIAL(c)) { @@ -494,23 +539,27 @@ char_u* transchar(int c) if ((!chartab_initialized && (((c >= ' ') && (c <= '~')) || (p_altkeymap && F_ischar(c)))) - || ((c < 256) && vim_isprintc_strict(c))) { + || ((c <= 0xFF) && vim_isprintc_strict(c))) { // printable character transchar_buf[i] = (char_u)c; transchar_buf[i + 1] = NUL; - } else { + } else if (c <= 0xFF) { transchar_nonprint(transchar_buf + i, c); + } else { + transchar_hex((char *)transchar_buf + i, c); } return transchar_buf; } -/// Like transchar(), but called with a byte instead of a character. Checks -/// for an illegal UTF-8 byte. +/// Like transchar(), but called with a byte instead of a character /// -/// @param c +/// Checks for an illegal UTF-8 byte. +/// +/// @param[in] c Byte to translate. /// /// @return pointer to translated character in transchar_buf. -char_u* transchar_byte(int c) +char_u *transchar_byte(const int c) + FUNC_ATTR_WARN_UNUSED_RESULT { if (c >= 0x80) { transchar_nonprint(transchar_buf, c); @@ -519,12 +568,14 @@ char_u* transchar_byte(int c) return transchar(c); } -/// Convert non-printable character to two or more printable characters in -/// "buf[]". "buf" needs to be able to hold five bytes. -/// Does NOT work for multi-byte characters, c must be <= 255. +/// Convert non-printable characters to 2..4 printable ones /// -/// @param buf -/// @param c +/// @warning Does not work for multi-byte characters, c must be <= 255. +/// +/// @param[out] buf Buffer to store result in, must be able to hold at least +/// 5 bytes (conversion result + NUL). +/// @param[in] c Character to convert. NUL is assumed to be NL according to +/// `:h NL-used-for-NUL`. void transchar_nonprint(char_u *buf, int c) { if (c == NL) { @@ -534,54 +585,63 @@ void transchar_nonprint(char_u *buf, int c) // we use CR in place of NL in this case c = NL; } + assert(c <= 0xff); - if (dy_flags & DY_UHEX) { + if (dy_flags & DY_UHEX || c > 0x7f) { // 'display' has "uhex" - transchar_hex(buf, c); - } else if (c <= 0x7f) { + transchar_hex((char *)buf, c); + } else { // 0x00 - 0x1f and 0x7f buf[0] = '^'; // DEL displayed as ^? buf[1] = (char_u)(c ^ 0x40); buf[2] = NUL; - } else { - transchar_hex(buf, c); } } -/// Convert a non-printable character to hex. +/// Convert a non-printable character to hex C string like "<FFFF>" /// -/// @param buf -/// @param c -void transchar_hex(char_u *buf, int c) +/// @param[out] buf Buffer to store result in. +/// @param[in] c Character to convert. +/// +/// @return Number of bytes stored in buffer, excluding trailing NUL byte. +size_t transchar_hex(char *const buf, const int c) + FUNC_ATTR_NONNULL_ALL { - int i = 0; + size_t i = 0; - buf[0] = '<'; + buf[i++] = '<'; if (c > 255) { - buf[++i] = (char_u)nr2hex((unsigned)c >> 12); - buf[++i] = (char_u)nr2hex((unsigned)c >> 8); - } - buf[++i] = (char_u)(nr2hex((unsigned)c >> 4)); - buf[++i] = (char_u)(nr2hex((unsigned)c)); - buf[++i] = '>'; - buf[++i] = NUL; + if (c > 255 * 256) { + buf[i++] = (char)nr2hex((unsigned)c >> 20); + buf[i++] = (char)nr2hex((unsigned)c >> 16); + } + buf[i++] = (char)nr2hex((unsigned)c >> 12); + buf[i++] = (char)nr2hex((unsigned)c >> 8); + } + buf[i++] = (char)(nr2hex((unsigned)c >> 4)); + buf[i++] = (char)(nr2hex((unsigned)c)); + buf[i++] = '>'; + buf[i] = NUL; + return i; } -/// Convert the lower 4 bits of byte "c" to its hex character. +/// Convert the lower 4 bits of byte "c" to its hex character +/// /// Lower case letters are used to avoid the confusion of <F1> being 0xf1 or /// function key 1. /// -/// @param c +/// @param[in] n Number to convert. /// /// @return the hex character. -static unsigned nr2hex(unsigned c) +static inline unsigned nr2hex(unsigned n) + FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT { - if ((c & 0xf) <= 9) { - return (c & 0xf) + '0'; + if ((n & 0xf) <= 9) { + return (n & 0xf) + '0'; } - return (c & 0xf) - 10 + 'a'; + return (n & 0xf) - 10 + 'a'; } /// Return number of display cells occupied by byte "b". @@ -863,7 +923,7 @@ bool vim_isprintc(int c) if (c >= 0x100) { return utf_printable(c); } - return c >= 0x100 || (c > 0 && (g_chartab[c] & CT_PRINT_CHAR)); + return c > 0 && (g_chartab[c] & CT_PRINT_CHAR); } /// Strict version of vim_isprintc(c), don't return true if "c" is the head @@ -1671,7 +1731,7 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len, && !STRING_ENDED(ptr + 1) && ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9') { pre = ptr[1]; - // Detect hexadecimal: 0x or 0X follwed by hex digit + // Detect hexadecimal: 0x or 0X followed by hex digit. if ((what & STR2NR_HEX) && !STRING_ENDED(ptr + 2) && (pre == 'X' || pre == 'x') @@ -1679,7 +1739,7 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len, ptr += 2; goto vim_str2nr_hex; } - // Detect binary: 0b or 0B follwed by 0 or 1 + // Detect binary: 0b or 0B followed by 0 or 1. if ((what & STR2NR_BIN) && !STRING_ENDED(ptr + 2) && (pre == 'B' || pre == 'b') @@ -1687,7 +1747,7 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len, ptr += 2; goto vim_str2nr_bin; } - // Detect octal number: zero followed by octal digits without '8' or '9' + // Detect octal number: zero followed by octal digits without '8' or '9'. pre = 0; if (!(what & STR2NR_OCT) || !('0' <= ptr[1] && ptr[1] <= '7')) { @@ -1718,32 +1778,21 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len, ptr++; \ } \ } while (0) - switch (pre) { - case 'b': - case 'B': { vim_str2nr_bin: - PARSE_NUMBER(2, (*ptr == '0' || *ptr == '1'), (*ptr - '0')); - break; - } - case '0': { + PARSE_NUMBER(2, (*ptr == '0' || *ptr == '1'), (*ptr - '0')); + goto vim_str2nr_proceed; vim_str2nr_oct: - PARSE_NUMBER(8, ('0' <= *ptr && *ptr <= '7'), (*ptr - '0')); - break; - } - case 0: { + PARSE_NUMBER(8, ('0' <= *ptr && *ptr <= '7'), (*ptr - '0')); + goto vim_str2nr_proceed; vim_str2nr_dec: - PARSE_NUMBER(10, (ascii_isdigit(*ptr)), (*ptr - '0')); - break; - } - case 'x': - case 'X': { + PARSE_NUMBER(10, (ascii_isdigit(*ptr)), (*ptr - '0')); + goto vim_str2nr_proceed; vim_str2nr_hex: - PARSE_NUMBER(16, (ascii_isxdigit(*ptr)), (hex2nr(*ptr))); - break; - } - } + PARSE_NUMBER(16, (ascii_isxdigit(*ptr)), (hex2nr(*ptr))); + goto vim_str2nr_proceed; #undef PARSE_NUMBER +vim_str2nr_proceed: if (prep != NULL) { *prep = pre; } |