aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBjörn Linse <bjorn.linse@gmail.com>2017-04-09 09:29:00 +0200
committerBjörn Linse <bjorn.linse@gmail.com>2017-04-10 12:02:26 +0200
commit4c857dae1169f3b8c7b6a9740f50acfd3c16858d (patch)
tree2368cea5211cd31269b2cc3c5b7e31147e9f268e
parenta3a06d02489a7a43cdd7909c92c79ef7971c8663 (diff)
downloadrneovim-4c857dae1169f3b8c7b6a9740f50acfd3c16858d.tar.gz
rneovim-4c857dae1169f3b8c7b6a9740f50acfd3c16858d.tar.bz2
rneovim-4c857dae1169f3b8c7b6a9740f50acfd3c16858d.zip
vim-patch:8.0.0243
Problem: When making a character lower case with tolower() changes the byte cound, it is not made lower case. Solution: Add strlow_save(). (Dominique Pelle, closes vim/vim#1406) https://github.com/vim/vim/commit/cc5b22b3bfdc0e9e835cf7871166badda31447bd Join almost identical strup_save and strlow_save functions to one Function.
-rw-r--r--src/nvim/eval.c26
-rw-r--r--src/nvim/strings.c51
-rw-r--r--src/nvim/testdir/test_functions.vim144
3 files changed, 168 insertions, 53 deletions
diff --git a/src/nvim/eval.c b/src/nvim/eval.c
index c1ee33b06a..ed07de51c8 100644
--- a/src/nvim/eval.c
+++ b/src/nvim/eval.c
@@ -16791,30 +16791,8 @@ void timer_teardown(void)
*/
static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr)
{
- char_u *p = (char_u *)xstrdup(tv_get_string(&argvars[0]));
rettv->v_type = VAR_STRING;
- rettv->vval.v_string = p;
-
- while (*p != NUL) {
- int l;
-
- if (enc_utf8) {
- int c, lc;
-
- c = utf_ptr2char(p);
- lc = mb_tolower(c);
- l = utf_ptr2len(p);
- /* TODO: reallocate string when byte count changes. */
- if (utf_char2len(lc) == l)
- utf_char2bytes(lc, p);
- p += l;
- } else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
- p += l; /* skip multi-byte character */
- else {
- *p = TOLOWER_LOC(*p); /* note that tolower() can be a macro */
- ++p;
- }
- }
+ rettv->vval.v_string = (char_u *)strcase_save(tv_get_string(&argvars[0]), false);
}
/*
@@ -16823,7 +16801,7 @@ static void f_tolower(typval_T *argvars, typval_T *rettv, FunPtr fptr)
static void f_toupper(typval_T *argvars, typval_T *rettv, FunPtr fptr)
{
rettv->v_type = VAR_STRING;
- rettv->vval.v_string = (char_u *)strup_save(tv_get_string(&argvars[0]));
+ rettv->vval.v_string = (char_u *)strcase_save(tv_get_string(&argvars[0]), true);
}
/*
diff --git a/src/nvim/strings.c b/src/nvim/strings.c
index e7c0fb8a7d..87e066d80a 100644
--- a/src/nvim/strings.c
+++ b/src/nvim/strings.c
@@ -291,14 +291,15 @@ void vim_strup(char_u *p)
}
}
-/// Make given string all upper-case
+/// Make given string all upper-case or all lower-case
///
-/// Handels multi-byte characters as good as possible.
+/// Handles multi-byte characters as good as possible.
///
/// @param[in] orig Input string.
+/// @param[in] upper If true make uppercase, otherwise lowercase
///
/// @return [allocated] upper-cased string.
-char *strup_save(const char *const orig)
+char *strcase_save(const char *const orig, bool upper)
FUNC_ATTR_NONNULL_RET FUNC_ATTR_MALLOC FUNC_ATTR_NONNULL_ALL
{
char *res = xstrdup(orig);
@@ -307,33 +308,25 @@ char *strup_save(const char *const orig)
while (*p != NUL) {
int l;
- if (enc_utf8) {
- int c = utf_ptr2char((const char_u *)p);
- int uc = mb_toupper(c);
-
- // Reallocate string when byte count changes. This is rare,
- // thus it's OK to do another malloc()/free().
- l = utf_ptr2len((const char_u *)p);
- int newl = utf_char2len(uc);
- if (newl != l) {
- // TODO(philix): use xrealloc() in strup_save()
- char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l));
- memcpy(s, res, (size_t)(p - res));
- STRCPY(s + (p - res) + newl, p + l);
- p = s + (p - res);
- xfree(res);
- res = s;
- }
-
- utf_char2bytes(uc, (char_u *)p);
- p += newl;
- } else if (has_mbyte && (l = (*mb_ptr2len)((const char_u *)p)) > 1) {
- p += l; // Skip multi-byte character.
- } else {
- // note that toupper() can be a macro
- *p = (char)(uint8_t)TOUPPER_LOC(*p);
- p++;
+ int c = utf_ptr2char((const char_u *)p);
+ int uc = upper ? mb_toupper(c) : mb_tolower(c);
+
+ // Reallocate string when byte count changes. This is rare,
+ // thus it's OK to do another malloc()/free().
+ l = utf_ptr2len((const char_u *)p);
+ int newl = utf_char2len(uc);
+ if (newl != l) {
+ // TODO(philix): use xrealloc() in strup_save()
+ char *s = xmalloc(STRLEN(res) + (size_t)(1 + newl - l));
+ memcpy(s, res, (size_t)(p - res));
+ STRCPY(s + (p - res) + newl, p + l);
+ p = s + (p - res);
+ xfree(res);
+ res = s;
}
+
+ utf_char2bytes(uc, (char_u *)p);
+ p += newl;
}
return res;
diff --git a/src/nvim/testdir/test_functions.vim b/src/nvim/testdir/test_functions.vim
index 81cb6314ce..3c258299c1 100644
--- a/src/nvim/testdir/test_functions.vim
+++ b/src/nvim/testdir/test_functions.vim
@@ -29,3 +29,147 @@ func Test_setbufvar_options()
bwipe!
endfunc
+func Test_tolower()
+ call assert_equal("", tolower(""))
+
+ " Test with all printable ASCII characters.
+ call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`abcdefghijklmnopqrstuvwxyz{|}~',
+ \ tolower(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'))
+
+ if !has('multi_byte')
+ return
+ endif
+
+ " Test with a few uppercase diacritics.
+ call assert_equal("aàáâãäåāăąǎǟǡả", tolower("AÀÁÂÃÄÅĀĂĄǍǞǠẢ"))
+ call assert_equal("bḃḇ", tolower("BḂḆ"))
+ call assert_equal("cçćĉċč", tolower("CÇĆĈĊČ"))
+ call assert_equal("dďđḋḏḑ", tolower("DĎĐḊḎḐ"))
+ call assert_equal("eèéêëēĕėęěẻẽ", tolower("EÈÉÊËĒĔĖĘĚẺẼ"))
+ call assert_equal("fḟ ", tolower("FḞ "))
+ call assert_equal("gĝğġģǥǧǵḡ", tolower("GĜĞĠĢǤǦǴḠ"))
+ call assert_equal("hĥħḣḧḩ", tolower("HĤĦḢḦḨ"))
+ call assert_equal("iìíîïĩīĭįiǐỉ", tolower("IÌÍÎÏĨĪĬĮİǏỈ"))
+ call assert_equal("jĵ", tolower("JĴ"))
+ call assert_equal("kķǩḱḵ", tolower("KĶǨḰḴ"))
+ call assert_equal("lĺļľŀłḻ", tolower("LĹĻĽĿŁḺ"))
+ call assert_equal("mḿṁ", tolower("MḾṀ"))
+ call assert_equal("nñńņňṅṉ", tolower("NÑŃŅŇṄṈ"))
+ call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ"))
+ call assert_equal("pṕṗ", tolower("PṔṖ"))
+ call assert_equal("q", tolower("Q"))
+ call assert_equal("rŕŗřṙṟ", tolower("RŔŖŘṘṞ"))
+ call assert_equal("sśŝşšṡ", tolower("SŚŜŞŠṠ"))
+ call assert_equal("tţťŧṫṯ", tolower("TŢŤŦṪṮ"))
+ call assert_equal("uùúûüũūŭůűųưǔủ", tolower("UÙÚÛÜŨŪŬŮŰŲƯǓỦ"))
+ call assert_equal("vṽ", tolower("VṼ"))
+ call assert_equal("wŵẁẃẅẇ", tolower("WŴẀẂẄẆ"))
+ call assert_equal("xẋẍ", tolower("XẊẌ"))
+ call assert_equal("yýŷÿẏỳỷỹ", tolower("YÝŶŸẎỲỶỸ"))
+ call assert_equal("zźżžƶẑẕ", tolower("ZŹŻŽƵẐẔ"))
+
+ " Test with a few lowercase diacritics, which should remain unchanged.
+ call assert_equal("aàáâãäåāăąǎǟǡả", tolower("aàáâãäåāăąǎǟǡả"))
+ call assert_equal("bḃḇ", tolower("bḃḇ"))
+ call assert_equal("cçćĉċč", tolower("cçćĉċč"))
+ call assert_equal("dďđḋḏḑ", tolower("dďđḋḏḑ"))
+ call assert_equal("eèéêëēĕėęěẻẽ", tolower("eèéêëēĕėęěẻẽ"))
+ call assert_equal("fḟ", tolower("fḟ"))
+ call assert_equal("gĝğġģǥǧǵḡ", tolower("gĝğġģǥǧǵḡ"))
+ call assert_equal("hĥħḣḧḩẖ", tolower("hĥħḣḧḩẖ"))
+ call assert_equal("iìíîïĩīĭįǐỉ", tolower("iìíîïĩīĭįǐỉ"))
+ call assert_equal("jĵǰ", tolower("jĵǰ"))
+ call assert_equal("kķǩḱḵ", tolower("kķǩḱḵ"))
+ call assert_equal("lĺļľŀłḻ", tolower("lĺļľŀłḻ"))
+ call assert_equal("mḿṁ ", tolower("mḿṁ "))
+ call assert_equal("nñńņňʼnṅṉ", tolower("nñńņňʼnṅṉ"))
+ call assert_equal("oòóôõöøōŏőơǒǫǭỏ", tolower("oòóôõöøōŏőơǒǫǭỏ"))
+ call assert_equal("pṕṗ", tolower("pṕṗ"))
+ call assert_equal("q", tolower("q"))
+ call assert_equal("rŕŗřṙṟ", tolower("rŕŗřṙṟ"))
+ call assert_equal("sśŝşšṡ", tolower("sśŝşšṡ"))
+ call assert_equal("tţťŧṫṯẗ", tolower("tţťŧṫṯẗ"))
+ call assert_equal("uùúûüũūŭůűųưǔủ", tolower("uùúûüũūŭůűųưǔủ"))
+ call assert_equal("vṽ", tolower("vṽ"))
+ call assert_equal("wŵẁẃẅẇẘ", tolower("wŵẁẃẅẇẘ"))
+ call assert_equal("ẋẍ", tolower("ẋẍ"))
+ call assert_equal("yýÿŷẏẙỳỷỹ", tolower("yýÿŷẏẙỳỷỹ"))
+ call assert_equal("zźżžƶẑẕ", tolower("zźżžƶẑẕ"))
+
+ " According to https://twitter.com/jifa/status/625776454479970304
+ " Ⱥ (U+023A) and Ⱦ (U+023E) are the *only* code points to increase
+ " in length (2 to 3 bytes) when lowercased. So let's test them.
+ call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
+endfunc
+
+func Test_toupper()
+ call assert_equal("", toupper(""))
+
+ " Test with all printable ASCII characters.
+ call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~',
+ \ toupper(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'))
+
+ if !has('multi_byte')
+ return
+ endif
+
+ " Test with a few lowercase diacritics.
+ call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("aàáâãäåāăąǎǟǡả"))
+ call assert_equal("BḂḆ", toupper("bḃḇ"))
+ call assert_equal("CÇĆĈĊČ", toupper("cçćĉċč"))
+ call assert_equal("DĎĐḊḎḐ", toupper("dďđḋḏḑ"))
+ call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("eèéêëēĕėęěẻẽ"))
+ call assert_equal("FḞ", toupper("fḟ"))
+ call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("gĝğġģǥǧǵḡ"))
+ call assert_equal("HĤĦḢḦḨẖ", toupper("hĥħḣḧḩẖ"))
+ call assert_equal("IÌÍÎÏĨĪĬĮǏỈ", toupper("iìíîïĩīĭįǐỉ"))
+ call assert_equal("JĴǰ", toupper("jĵǰ"))
+ call assert_equal("KĶǨḰḴ", toupper("kķǩḱḵ"))
+ call assert_equal("LĹĻĽĿŁḺ", toupper("lĺļľŀłḻ"))
+ call assert_equal("MḾṀ ", toupper("mḿṁ "))
+ call assert_equal("NÑŃŅŇʼnṄṈ", toupper("nñńņňʼnṅṉ"))
+ call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("oòóôõöøōŏőơǒǫǭỏ"))
+ call assert_equal("PṔṖ", toupper("pṕṗ"))
+ call assert_equal("Q", toupper("q"))
+ call assert_equal("RŔŖŘṘṞ", toupper("rŕŗřṙṟ"))
+ call assert_equal("SŚŜŞŠṠ", toupper("sśŝşšṡ"))
+ call assert_equal("TŢŤŦṪṮẗ", toupper("tţťŧṫṯẗ"))
+ call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("uùúûüũūŭůűųưǔủ"))
+ call assert_equal("VṼ", toupper("vṽ"))
+ call assert_equal("WŴẀẂẄẆẘ", toupper("wŵẁẃẅẇẘ"))
+ call assert_equal("ẊẌ", toupper("ẋẍ"))
+ call assert_equal("YÝŸŶẎẙỲỶỸ", toupper("yýÿŷẏẙỳỷỹ"))
+ call assert_equal("ZŹŻŽƵẐẔ", toupper("zźżžƶẑẕ"))
+
+ " Test that uppercase diacritics, which should remain unchanged.
+ call assert_equal("AÀÁÂÃÄÅĀĂĄǍǞǠẢ", toupper("AÀÁÂÃÄÅĀĂĄǍǞǠẢ"))
+ call assert_equal("BḂḆ", toupper("BḂḆ"))
+ call assert_equal("CÇĆĈĊČ", toupper("CÇĆĈĊČ"))
+ call assert_equal("DĎĐḊḎḐ", toupper("DĎĐḊḎḐ"))
+ call assert_equal("EÈÉÊËĒĔĖĘĚẺẼ", toupper("EÈÉÊËĒĔĖĘĚẺẼ"))
+ call assert_equal("FḞ ", toupper("FḞ "))
+ call assert_equal("GĜĞĠĢǤǦǴḠ", toupper("GĜĞĠĢǤǦǴḠ"))
+ call assert_equal("HĤĦḢḦḨ", toupper("HĤĦḢḦḨ"))
+ call assert_equal("IÌÍÎÏĨĪĬĮİǏỈ", toupper("IÌÍÎÏĨĪĬĮİǏỈ"))
+ call assert_equal("JĴ", toupper("JĴ"))
+ call assert_equal("KĶǨḰḴ", toupper("KĶǨḰḴ"))
+ call assert_equal("LĹĻĽĿŁḺ", toupper("LĹĻĽĿŁḺ"))
+ call assert_equal("MḾṀ", toupper("MḾṀ"))
+ call assert_equal("NÑŃŅŇṄṈ", toupper("NÑŃŅŇṄṈ"))
+ call assert_equal("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ", toupper("OÒÓÔÕÖØŌŎŐƠǑǪǬỎ"))
+ call assert_equal("PṔṖ", toupper("PṔṖ"))
+ call assert_equal("Q", toupper("Q"))
+ call assert_equal("RŔŖŘṘṞ", toupper("RŔŖŘṘṞ"))
+ call assert_equal("SŚŜŞŠṠ", toupper("SŚŜŞŠṠ"))
+ call assert_equal("TŢŤŦṪṮ", toupper("TŢŤŦṪṮ"))
+ call assert_equal("UÙÚÛÜŨŪŬŮŰŲƯǓỦ", toupper("UÙÚÛÜŨŪŬŮŰŲƯǓỦ"))
+ call assert_equal("VṼ", toupper("VṼ"))
+ call assert_equal("WŴẀẂẄẆ", toupper("WŴẀẂẄẆ"))
+ call assert_equal("XẊẌ", toupper("XẊẌ"))
+ call assert_equal("YÝŶŸẎỲỶỸ", toupper("YÝŶŸẎỲỶỸ"))
+ call assert_equal("ZŹŻŽƵẐẔ", toupper("ZŹŻŽƵẐẔ"))
+
+ call assert_equal("ⱥ ⱦ", tolower("Ⱥ Ⱦ"))
+endfunc
+
+