diff options
Diffstat (limited to 'src/nvim/regexp.c')
| -rw-r--r-- | src/nvim/regexp.c | 111 | 
1 files changed, 45 insertions, 66 deletions
| diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 4b5e17b00b..5448cc7131 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -1,3 +1,6 @@ +// This is an open source non-commercial project. Dear PVS-Studio, please check +// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com +  /*   * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()   * @@ -2398,7 +2401,7 @@ collection:                regc('\b');                break;              case CLASS_ESCAPE: -              regc('\033'); +              regc(ESC);                break;              }            } else { @@ -2923,13 +2926,8 @@ static void skipchr(void)    else      prevchr_len = 0;    if (regparse[prevchr_len] != NUL) { -    if (enc_utf8) -      /* exclude composing chars that mb_ptr2len does include */ -      prevchr_len += utf_ptr2len(regparse + prevchr_len); -    else if (has_mbyte) -      prevchr_len += (*mb_ptr2len)(regparse + prevchr_len); -    else -      ++prevchr_len; +    // Exclude composing chars that utfc_ptr2len does include. +    prevchr_len += utf_ptr2len(regparse + prevchr_len);    }    regparse += prevchr_len;    prev_at_start = at_start; @@ -3052,7 +3050,7 @@ static int getoctchrs(void)    int c;    int i; -  for (i = 0; i < 3 && nr < 040; ++i) { +  for (i = 0; i < 3 && nr < 040; i++) {  // -V536      c = regparse[0];      if (c < '0' || c > '7')        break; @@ -3429,32 +3427,26 @@ static long bt_regexec_both(char_u *line,        c = *prog->regmust;      s = line + col; -    /* -     * This is used very often, esp. for ":global".  Use three versions of -     * the loop to avoid overhead of conditions. -     */ -    if (!ireg_ic -        && !has_mbyte -        ) -      while ((s = vim_strbyte(s, c)) != NULL) { -        if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) -          break;                        /* Found it. */ -        ++s; -      } -    else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1)) +    // This is used very often, esp. for ":global".  Use two versions of +    // the loop to avoid overhead of conditions. +    if (!ireg_ic) {        while ((s = vim_strchr(s, c)) != NULL) { -        if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) -          break;                        /* Found it. */ +        if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) { +          break;  // Found it. +        }          mb_ptr_adv(s);        } -    else +    } else {        while ((s = cstrchr(s, c)) != NULL) { -        if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) -          break;                        /* Found it. */ +        if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0) { +          break;  // Found it. +        }          mb_ptr_adv(s);        } -    if (s == NULL)              /* Not present. */ +    } +    if (s == NULL) {  // Not present.        goto theend; +    }    }    regline = line; @@ -3484,14 +3476,8 @@ static long bt_regexec_both(char_u *line,      /* Messy cases:  unanchored match. */      while (!got_int) {        if (prog->regstart != NUL) { -        /* Skip until the char we know it must start with. -         * Used often, do some work to avoid call overhead. */ -        if (!ireg_ic -            && !has_mbyte -            ) -          s = vim_strbyte(regline + col, prog->regstart); -        else -          s = cstrchr(regline + col, prog->regstart); +        // Skip until the char we know it must start with. +        s = cstrchr(regline + col, prog->regstart);          if (s == NULL) {            retval = 0;            break; @@ -6301,44 +6287,37 @@ static int cstrncmp(char_u *s1, char_u *s2, int *n)  /*   * cstrchr: This function is used a lot for simple searches, keep it fast!   */ -static char_u *cstrchr(char_u *s, int c) +static inline char_u *cstrchr(const char_u *const s, const int c) +  FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL +  FUNC_ATTR_ALWAYS_INLINE  { -  char_u      *p; -  int cc; - -  if (!ireg_ic -      || (!enc_utf8 && mb_char2len(c) > 1) -      ) +  if (!ireg_ic) {      return vim_strchr(s, c); +  } -  /* tolower() and toupper() can be slow, comparing twice should be a lot -   * faster (esp. when using MS Visual C++!). -   * For UTF-8 need to use folded case. */ +  // Use folded case for UTF-8, slow! For ASCII use libc strpbrk which is +  // expected to be highly optimized.    if (c > 0x80) { -    cc = utf_fold(c); -  } else if (mb_isupper(c)) { -    cc = mb_tolower(c); -  } else if (mb_islower(c)) { -    cc = mb_toupper(c); +    const int folded_c = utf_fold(c); +    for (const char_u *p = s; *p != NUL; p += utfc_ptr2len(p)) { +      if (utf_fold(utf_ptr2char(p)) == folded_c) { +        return (char_u *)p; +      } +    } +    return NULL; +  } + +  int cc; +  if (ASCII_ISUPPER(c)) { +    cc = TOLOWER_ASC(c); +  } else if (ASCII_ISLOWER(c)) { +    cc = TOUPPER_ASC(c);    } else {      return vim_strchr(s, c);    } -  if (has_mbyte) { -    for (p = s; *p != NUL; p += (*mb_ptr2len)(p)) { -      if (enc_utf8 && c > 0x80) { -        if (utf_fold(utf_ptr2char(p)) == cc) -          return p; -      } else if (*p == c || *p == cc) -        return p; -    } -  } else -    /* Faster version for when there are no multi-byte characters. */ -    for (p = s; *p != NUL; ++p) -      if (*p == c || *p == cc) -        return p; - -  return NULL; +  char tofind[] = { (char)c, (char)cc, NUL }; +  return (char_u *)strpbrk((const char *)s, tofind);  }  /*************************************************************** | 
