From 37fe5aa44452d351b6c0ad6a4b2aa75f10540d22 Mon Sep 17 00:00:00 2001 From: oni-link Date: Thu, 22 May 2014 13:13:00 +0200 Subject: vim-patch:7.4.292 #754 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: Searching for "a" does not match accented "a" with new regexp engine, does match with old engine. (David Bürgin) "ca" does not match "ca" with accented "a" with either engine. Solution: Change the old engine, check for following composing character also for single-byte patterns. https://code.google.com/p/vim/source/detail?r=60cdaa05a6ad31cef55eb6b3dc1f57ecac6fcf79 --- src/nvim/regexp.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) (limited to 'src/nvim/regexp.c') diff --git a/src/nvim/regexp.c b/src/nvim/regexp.c index 8c0652dd01..b37fb1e39d 100644 --- a/src/nvim/regexp.c +++ b/src/nvim/regexp.c @@ -4093,25 +4093,28 @@ regmatch ( else if (*opnd == NUL) { /* match empty string always works; happens when "~" is * empty. */ - } else if (opnd[1] == NUL - && !(enc_utf8 && ireg_ic) - ) - ++reginput; /* matched a single char */ - else { - len = (int)STRLEN(opnd); - /* Need to match first byte again for multi-byte. */ - if (cstrncmp(opnd, reginput, &len) != 0) - status = RA_NOMATCH; - /* Check for following composing character. */ - else if (enc_utf8 - && UTF_COMPOSINGLIKE(reginput, reginput + len)) { - /* raaron: This code makes a composing character get - * ignored, which is the correct behavior (sometimes) - * for voweled Hebrew texts. */ - if (!ireg_icombine) + } else { + if (opnd[1] == NUL && !(enc_utf8 && ireg_ic)) { + len = 1; /* matched a single byte above */ + } else { + // Need to match first byte again for multi-byte. + len = (int)STRLEN(opnd); + if (cstrncmp(opnd, reginput, &len) != 0) { status = RA_NOMATCH; - } else + } + } + // Check for following composing character. + if (status != RA_NOMATCH && enc_utf8 + && UTF_COMPOSINGLIKE(reginput, reginput + len) + && !ireg_icombine) { + // raaron: This code makes a composing character get + // ignored, which is the correct behavior (sometimes) + // for voweled Hebrew texts. + status = RA_NOMATCH; + } + if (status != RA_NOMATCH) { reginput += len; + } } } break; -- cgit