From 19044a15f9d41a7424e94fb3f0e257537e7afa5e Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 6 Apr 2017 21:21:11 +0300 Subject: strings: Replace vim_strchr implementation with a saner one Removes dead code (enc_utf8, enc_dbcs and has_mbyte now have hardcoded values), relies on libc implementation being more optimized. Also where previously negative character just would never be found it is an assertion error now. Ref #1476 --- src/nvim/strings.c | 64 ++++++++++++++++-------------------------------------- 1 file changed, 19 insertions(+), 45 deletions(-) (limited to 'src') diff --git a/src/nvim/strings.c b/src/nvim/strings.c index 5dcffe00e0..c4bc9b204a 100644 --- a/src/nvim/strings.c +++ b/src/nvim/strings.c @@ -401,54 +401,28 @@ int vim_strnicmp(const char *s1, const char *s2, size_t len) } #endif -/* - * Version of strchr() and strrchr() that handle unsigned char strings - * with characters from 128 to 255 correctly. It also doesn't return a - * pointer to the NUL at the end of the string. - */ -char_u *vim_strchr(const char_u *string, int c) - FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE +/// strchr() version which handles multibyte strings +/// +/// @param[in] string String to search in. +/// @param[in] c Character to search for. Must be a valid character. +/// +/// @return Pointer to the first byte of the found character in string or NULL +/// if it was not found. NUL character is never found, use `strlen()` +/// instead. +char_u *vim_strchr(const char_u *const string, const int c) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT { - int b; - - const char_u *p = string; - if (enc_utf8 && c >= 0x80) { - while (*p != NUL) { - int l = (*mb_ptr2len)(p); - - // Avoid matching an illegal byte here. - if (l > 1 && utf_ptr2char(p) == c) { - return (char_u *) p; - } - p += l; - } + assert(c >= 0); + if (c == 0) { return NULL; + } else if (c < 0x80) { + return (char_u *)strchr((const char *)string, c); + } else { + char u8char[MB_MAXBYTES + 1]; + const int len = utf_char2bytes(c, (char_u *)u8char); + u8char[len] = NUL; + return (char_u *)strstr((const char *)string, u8char); } - if (enc_dbcs != 0 && c > 255) { - int n2 = c & 0xff; - - c = ((unsigned)c >> 8) & 0xff; - while ((b = *p) != NUL) { - if (b == c && p[1] == n2) - return (char_u *) p; - p += (*mb_ptr2len)(p); - } - return NULL; - } - if (has_mbyte) { - while ((b = *p) != NUL) { - if (b == c) - return (char_u *) p; - p += (*mb_ptr2len)(p); - } - return NULL; - } - while ((b = *p) != NUL) { - if (b == c) - return (char_u *) p; - ++p; - } - return NULL; } /* -- cgit From 171baaee93c8e257ef593b30c05405d03ac30c96 Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 6 Apr 2017 21:31:37 +0300 Subject: strings: Remove vim_strbyte Ref #1476 --- src/nvim/ex_cmds.c | 13 ++++++------- src/nvim/getchar.c | 4 ++-- src/nvim/mbyte.c | 6 +++--- src/nvim/regexp.c | 42 +++++++++++++++--------------------------- src/nvim/regexp_nfa.c | 13 +++---------- src/nvim/strings.c | 18 ------------------ 6 files changed, 29 insertions(+), 67 deletions(-) (limited to 'src') diff --git a/src/nvim/ex_cmds.c b/src/nvim/ex_cmds.c index 9a847a4c0a..8485a1ca66 100644 --- a/src/nvim/ex_cmds.c +++ b/src/nvim/ex_cmds.c @@ -5087,14 +5087,13 @@ static void helptags_one(char_u *dir, char_u *ext, char_u *tagfname, } p1 = vim_strchr(IObuff, '*'); /* find first '*' */ while (p1 != NULL) { - /* Use vim_strbyte() instead of vim_strchr() so that when - * 'encoding' is dbcs it still works, don't find '*' in the - * second byte. */ - p2 = vim_strbyte(p1 + 1, '*'); /* find second '*' */ - if (p2 != NULL && p2 > p1 + 1) { /* skip "*" and "**" */ - for (s = p1 + 1; s < p2; ++s) - if (*s == ' ' || *s == '\t' || *s == '|') + p2 = (char_u *)strchr((const char *)p1 + 1, '*'); // Find second '*'. + if (p2 != NULL && p2 > p1 + 1) { // Skip "*" and "**". + for (s = p1 + 1; s < p2; s++) { + if (*s == ' ' || *s == '\t' || *s == '|') { break; + } + } /* * Only accept a *tag* when it consists of valid diff --git a/src/nvim/getchar.c b/src/nvim/getchar.c index b83681ad01..56493a300d 100644 --- a/src/nvim/getchar.c +++ b/src/nvim/getchar.c @@ -3588,8 +3588,8 @@ int check_abbr(int c, char_u *ptr, int col, int mincol) char_u *q = mp->m_keys; int match; - if (vim_strbyte(mp->m_keys, K_SPECIAL) != NULL) { - /* might have CSI escaped mp->m_keys */ + if (strchr((const char *)mp->m_keys, K_SPECIAL) != NULL) { + // Might have CSI escaped mp->m_keys. q = vim_strsave(mp->m_keys); vim_unescape_csi(q); qlen = (int)STRLEN(q); diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index d96848754c..c31fee44af 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -356,10 +356,10 @@ int bomb_size(void) */ void remove_bom(char_u *s) { - char_u *p = s; + char *p = (char *)s; - while ((p = vim_strbyte(p, 0xef)) != NULL) { - if (p[1] == 0xbb && p[2] == 0xbf) { + while ((p = strchr(p, 0xef)) != NULL) { + if ((uint8_t)p[1] == 0xbb && (uint8_t)p[2] == 0xbf) { STRMOVE(p, p + 3); } else { p++; diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 9baa53d2a2..e9c9b491fd 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -3429,32 +3429,26 @@ static long bt_regexec_both(char_u *line, c = *prog->regmust; s = line + col; - /* - * This is used very often, esp. for ":global". Use three versions of - * the loop to avoid overhead of conditions. - */ - if (!ireg_ic - && !has_mbyte - ) - while ((s = vim_strbyte(s, c)) != NULL) { - if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) - break; /* Found it. */ - ++s; - } - else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1)) + // This is used very often, esp. for ":global". Use two versions of + // the loop to avoid overhead of conditions. + if (!ireg_ic) { while ((s = vim_strchr(s, c)) != NULL) { - if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) - break; /* Found it. */ + if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) { + break; // Found it. + } mb_ptr_adv(s); } - else + } else { while ((s = cstrchr(s, c)) != NULL) { - if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) - break; /* Found it. */ + if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) { + break; // Found it. + } mb_ptr_adv(s); } - if (s == NULL) /* Not present. */ + } + if (s == NULL) { // Not present. goto theend; + } } regline = line; @@ -3484,14 +3478,8 @@ static long bt_regexec_both(char_u *line, /* Messy cases: unanchored match. */ while (!got_int) { if (prog->regstart != NUL) { - /* Skip until the char we know it must start with. - * Used often, do some work to avoid call overhead. */ - if (!ireg_ic - && !has_mbyte - ) - s = vim_strbyte(regline + col, prog->regstart); - else - s = cstrchr(regline + col, prog->regstart); + // Skip until the char we know it must start with. + s = cstrchr(regline + col, prog->regstart); if (s == NULL) { retval = 0; break; diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 5b49ab38f0..a77978884e 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -4855,17 +4855,10 @@ static int failure_chance(nfa_state_T *state, int depth) */ static int skip_to_start(int c, colnr_T *colp) { - char_u *s; - - /* Used often, do some work to avoid call overhead. */ - if (!ireg_ic - && !has_mbyte - ) - s = vim_strbyte(regline + *colp, c); - else - s = cstrchr(regline + *colp, c); - if (s == NULL) + const char_u *const s = cstrchr(regline + *colp, c); + if (s == NULL) { return FAIL; + } *colp = (int)(s - regline); return OK; } diff --git a/src/nvim/strings.c b/src/nvim/strings.c index c4bc9b204a..ada4c108cf 100644 --- a/src/nvim/strings.c +++ b/src/nvim/strings.c @@ -425,24 +425,6 @@ char_u *vim_strchr(const char_u *const string, const int c) } } -/* - * Version of strchr() that only works for bytes and handles unsigned char - * strings with characters above 128 correctly. It also doesn't return a - * pointer to the NUL at the end of the string. - */ -char_u *vim_strbyte(const char_u *string, int c) - FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE -{ - const char_u *p = string; - - while (*p != NUL) { - if (*p == c) - return (char_u *) p; - ++p; - } - return NULL; -} - /* * Search for last occurrence of "c" in "string". * Return NULL if not found. -- cgit From ac1cb1c72fa762b39ff457153c7fa6ecf1eaedc3 Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 6 Apr 2017 21:56:49 +0300 Subject: regexp: Refactor cstrchr Ref #1476 --- src/nvim/regexp.c | 55 +++++++++++++++++++++++++------------------------------ 1 file changed, 25 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index e9c9b491fd..175aa1b970 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -6287,43 +6287,38 @@ static int cstrncmp(char_u *s1, char_u *s2, int *n) /* * cstrchr: This function is used a lot for simple searches, keep it fast! */ -static char_u *cstrchr(char_u *s, int c) +static inline char_u *cstrchr(const char_u *const s, const int c) + FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL + FUNC_ATTR_ALWAYS_INLINE { - char_u *p; - int cc; - - if (!ireg_ic - || (!enc_utf8 && mb_char2len(c) > 1) - ) + if (!ireg_ic) { return vim_strchr(s, c); + } + + // tolower() and toupper() can be slow, comparing twice should be a lot + // faster (esp. when using MS Visual C++!). + // For UTF-8 need to use folded case. + if (c > 0x80) { + const int folded_c = utf_fold(c); + for (const char_u *p = s; *p != NUL; p += utfc_ptr2len(p)) { + if (utf_fold(utf_ptr2char(p)) == folded_c) { + return (char_u *)p; + } + } + return NULL; + } - /* tolower() and toupper() can be slow, comparing twice should be a lot - * faster (esp. when using MS Visual C++!). - * For UTF-8 need to use folded case. */ - if (enc_utf8 && c > 0x80) - cc = utf_fold(c); - else if (vim_isupper(c)) + int cc; + if (vim_isupper(c)) { cc = vim_tolower(c); - else if (vim_islower(c)) + } else if (vim_islower(c)) { cc = vim_toupper(c); - else + } else { return vim_strchr(s, c); + } - if (has_mbyte) { - for (p = s; *p != NUL; p += (*mb_ptr2len)(p)) { - if (enc_utf8 && c > 0x80) { - if (utf_fold(utf_ptr2char(p)) == cc) - return p; - } else if (*p == c || *p == cc) - return p; - } - } else - /* Faster version for when there are no multi-byte characters. */ - for (p = s; *p != NUL; ++p) - if (*p == c || *p == cc) - return p; - - return NULL; + char tofind[] = { (char)c, (char)cc, NUL }; + return (char_u *)strpbrk((const char *)s, tofind); } /*************************************************************** -- cgit From caeff6e1aff227bb5826ad575362d2a24adebaa9 Mon Sep 17 00:00:00 2001 From: ZyX Date: Fri, 7 Apr 2017 23:18:36 +0300 Subject: regexp: Do not use locale-dependent functions in cstrchr --- src/nvim/regexp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 175aa1b970..893089f06d 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -6309,10 +6309,10 @@ static inline char_u *cstrchr(const char_u *const s, const int c) } int cc; - if (vim_isupper(c)) { - cc = vim_tolower(c); - } else if (vim_islower(c)) { - cc = vim_toupper(c); + if (ASCII_ISUPPER(c)) { + cc = TOLOWER_ASC(c); + } else if (ASCII_ISLOWER(c)) { + cc = TOUPPER_ASC(c); } else { return vim_strchr(s, c); } -- cgit From acc52a953b99f78435c34337b8ca9b6716a057a1 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 8 Apr 2017 02:55:51 +0300 Subject: regexp: Update comment in cstrchr() --- src/nvim/regexp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 893089f06d..96884aa87f 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -6295,9 +6295,8 @@ static inline char_u *cstrchr(const char_u *const s, const int c) return vim_strchr(s, c); } - // tolower() and toupper() can be slow, comparing twice should be a lot - // faster (esp. when using MS Visual C++!). - // For UTF-8 need to use folded case. + // Use folded case for UTF-8, slow! For ASCII use libc strpbrk which is + // expected to be highly optimized. if (c > 0x80) { const int folded_c = utf_fold(c); for (const char_u *p = s; *p != NUL; p += utfc_ptr2len(p)) { -- cgit