diff options
Diffstat (limited to 'src/nvim/regexp.c')
| -rw-r--r-- | src/nvim/regexp.c | 26 | 
1 files changed, 18 insertions, 8 deletions
diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index fd1b6116f2..193c68860d 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -258,6 +258,7 @@  #define RE_MARK         207     /* mark cmp  Match mark position */  #define RE_VISUAL       208     /*	Match Visual area */ +#define RE_COMPOSING    209     // any composing characters  /*   * Magic characters have a special meaning, they don't match literally. @@ -1256,12 +1257,6 @@ static regprog_T *bt_regcomp(char_u *expr, int re_flags)    if (reg(REG_NOPAREN, &flags) == NULL)      return NULL; -  /* Small enough for pointer-storage convention? */ -#ifdef SMALL_MALLOC             /* 16 bit storage allocation */ -  if (regsize >= 65536L - 256L) -    EMSG_RET_NULL(_("E339: Pattern too long")); -#endif -    /* Allocate space. */    bt_regprog_T *r = xmalloc(sizeof(bt_regprog_T) + regsize); @@ -2030,6 +2025,10 @@ static char_u *regatom(int *flagp)        ret = regnode(RE_VISUAL);        break; +    case 'C': +      ret = regnode(RE_COMPOSING); +      break; +      /* \%[abc]: Emit as a list of branches, all ending at the last       * branch which matches nothing. */      case '[': @@ -4105,10 +4104,12 @@ regmatch (                  status = RA_NOMATCH;                }              } -            // Check for following composing character. +            // Check for following composing character, unless %C +            // follows (skips over all composing chars).              if (status != RA_NOMATCH && enc_utf8                  && UTF_COMPOSINGLIKE(reginput, reginput + len) -                && !ireg_icombine) { +                && !ireg_icombine +                && OP(next) != RE_COMPOSING) {                // raaron: This code makes a composing character get                // ignored, which is the correct behavior (sometimes)                // for voweled Hebrew texts. @@ -4173,6 +4174,15 @@ regmatch (              status = RA_NOMATCH;            break; +        case RE_COMPOSING: +          if (enc_utf8) { +            // Skip composing characters. +            while (utf_iscomposing(utf_ptr2char(reginput))) { +              mb_cptr_adv(reginput); +            } +          } +          break; +          case NOTHING:            break;  | 
