diff options
-rw-r--r-- | runtime/doc/change.txt | 13 | ||||
-rw-r--r-- | runtime/doc/eval.txt | 7 | ||||
-rw-r--r-- | src/nvim/eval/funcs.c | 11 | ||||
-rw-r--r-- | src/nvim/ex_cmds.c | 18 | ||||
-rw-r--r-- | src/nvim/testdir/test_sort.vim | 71 |
5 files changed, 109 insertions, 11 deletions
diff --git a/runtime/doc/change.txt b/runtime/doc/change.txt index 310d244fbc..bdef544b2a 100644 --- a/runtime/doc/change.txt +++ b/runtime/doc/change.txt @@ -1742,7 +1742,7 @@ Vim has a sorting function and a sorting command. The sorting function can be found here: |sort()|, |uniq()|. *:sor* *:sort* -:[range]sor[t][!] [b][f][i][n][o][r][u][x] [/{pattern}/] +:[range]sor[t][!] [b][f][i][l][n][o][r][u][x] [/{pattern}/] Sort lines in [range]. When no range is given all lines are sorted. @@ -1750,6 +1750,14 @@ found here: |sort()|, |uniq()|. With [i] case is ignored. + With [l] sort uses the current locale. See + `language collate` to check or set the locale used + for ordering. For example, with "en_US.UTF8", + Ö will be ordered after O and before P, + whereas with the Swedish locale "sv_SE.UTF8", + it will be after Z. + Case is typically ignored by the locale. + Options [n][f][x][o][b] are mutually exclusive. With [n] sorting is done on the first decimal number @@ -1816,8 +1824,7 @@ found here: |sort()|, |uniq()|. Note that using `:sort` with `:global` doesn't sort the matching lines, it's quite useless. -The details about sorting depend on the library function used. There is no -guarantee that sorting obeys the current locale. You will have to try it out. +`:sort` does not use the current locale unless the l flag is used. Vim does do a "stable" sort. The sorting can be interrupted, but if you interrupt it too late in the diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt index 01803041a8..2aff9fc127 100644 --- a/runtime/doc/eval.txt +++ b/runtime/doc/eval.txt @@ -8359,6 +8359,13 @@ sort({list} [, {func} [, {dict}]]) *sort()* *E702* When {func} is given and it is '1' or 'i' then case is ignored. + When {func} is given and it is 'l' then the current locale + is used for ordering. See `language collate` to check or set + the locale used for ordering. For example, with "en_US.UTF8", + Ö will be ordered after O and before P, whereas with the + Swedish locale "sv_SE.UTF8", it will be after Z. + Case is typically ignored by the locale. + When {func} is given and it is 'n' then all items will be sorted numerical (Implementation detail: This uses the strtod() function to parse numbers, Strings, Lists, Dicts and diff --git a/src/nvim/eval/funcs.c b/src/nvim/eval/funcs.c index 0d288e2cc2..a24efd93ea 100644 --- a/src/nvim/eval/funcs.c +++ b/src/nvim/eval/funcs.c @@ -9166,6 +9166,7 @@ static void f_sockconnect(typval_T *argvars, typval_T *rettv, FunPtr fptr) /// struct storing information about current sort typedef struct { int item_compare_ic; + bool item_compare_lc; bool item_compare_numeric; bool item_compare_numbers; bool item_compare_float; @@ -9240,10 +9241,10 @@ static int item_compare(const void *s1, const void *s2, bool keep_zero) p2 = ""; } if (!sortinfo->item_compare_numeric) { - if (sortinfo->item_compare_ic) { - res = STRICMP(p1, p2); + if (sortinfo->item_compare_lc) { + res = strcoll(p1, p2); } else { - res = STRCMP(p1, p2); + res = sortinfo->item_compare_ic ? STRICMP(p1, p2): STRCMP(p1, p2); } } else { double n1, n2; @@ -9378,6 +9379,7 @@ static void do_sort_uniq(typval_T *argvars, typval_T *rettv, bool sort) } info.item_compare_ic = false; + info.item_compare_lc = false; info.item_compare_numeric = false; info.item_compare_numbers = false; info.item_compare_float = false; @@ -9422,6 +9424,9 @@ static void do_sort_uniq(typval_T *argvars, typval_T *rettv, bool sort) } else if (strcmp(info.item_compare_func, "i") == 0) { info.item_compare_func = NULL; info.item_compare_ic = true; + } else if (strcmp(info.item_compare_func, "l") == 0) { + info.item_compare_func = NULL; + info.item_compare_lc = true; } } } diff --git a/src/nvim/ex_cmds.c b/src/nvim/ex_cmds.c index 3e330b88a2..349c7d6280 100644 --- a/src/nvim/ex_cmds.c +++ b/src/nvim/ex_cmds.c @@ -358,6 +358,7 @@ static int linelen(int *has_tab) static char_u *sortbuf1; static char_u *sortbuf2; +static int sort_lc; ///< sort using locale static int sort_ic; ///< ignore case static int sort_nr; ///< sort on number static int sort_rx; ///< sort on regex instead of skipping it @@ -381,6 +382,13 @@ typedef struct { } st_u; } sorti_T; +static int string_compare(const void *s1, const void *s2) FUNC_ATTR_NONNULL_ALL +{ + if (sort_lc) { + return strcoll((char *)s1, (char *)s2); + } + return sort_ic ? STRICMP(s1, s2) : STRCMP(s1, s2); +} static int sort_compare(const void *s1, const void *s2) { @@ -424,8 +432,7 @@ static int sort_compare(const void *s1, const void *s2) l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr + 1); sortbuf2[l2.st_u.line.end_col_nr - l2.st_u.line.start_col_nr] = NUL; - result = sort_ic ? STRICMP(sortbuf1, sortbuf2) - : STRCMP(sortbuf1, sortbuf2); + result = string_compare(sortbuf1, sortbuf2); } /* If two lines have the same value, preserve the original line order. */ @@ -466,7 +473,7 @@ void ex_sort(exarg_T *eap) regmatch.regprog = NULL; sorti_T *nrs = xmalloc(count * sizeof(sorti_T)); - sort_abort = sort_ic = sort_rx = sort_nr = sort_flt = 0; + sort_abort = sort_ic = sort_lc = sort_rx = sort_nr = sort_flt = 0; size_t format_found = 0; bool change_occurred = false; // Buffer contents changed. @@ -474,6 +481,8 @@ void ex_sort(exarg_T *eap) if (ascii_iswhite(*p)) { } else if (*p == 'i') { sort_ic = true; + } else if (*p == 'l') { + sort_lc = true; } else if (*p == 'r') { sort_rx = true; } else if (*p == 'n') { @@ -645,8 +654,7 @@ void ex_sort(exarg_T *eap) s = ml_get(get_lnum); size_t bytelen = STRLEN(s) + 1; // include EOL in bytelen old_count += bytelen; - if (!unique || i == 0 - || (sort_ic ? STRICMP(s, sortbuf1) : STRCMP(s, sortbuf1)) != 0) { + if (!unique || i == 0 || string_compare(s, sortbuf1) != 0) { // Copy the line into a buffer, it may become invalid in // ml_append(). And it's needed for "unique". STRCPY(sortbuf1, s); diff --git a/src/nvim/testdir/test_sort.vim b/src/nvim/testdir/test_sort.vim index 8287a761c6..278cbef01e 100644 --- a/src/nvim/testdir/test_sort.vim +++ b/src/nvim/testdir/test_sort.vim @@ -13,6 +13,25 @@ func Test_sort_strings() " numbers compared as strings call assert_equal([1, 2, 3], sort([3, 2, 1])) call assert_equal([13, 28, 3], sort([3, 28, 13])) + + call assert_equal(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'], + \ sort(['A', 'O', 'P', 'a', 'o', 'p', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'])) + + call assert_equal(['A', 'a', 'o', 'O', 'p', 'P', 'Ä', 'Ô', 'ä', 'ô', 'œ', 'œ'], + \ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'i')) + + let lc = execute('language collate') + " With the following locales, the accentuated letters are ordered + " similarly to the non-accentuated letters... + if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"' + call assert_equal(['a', 'A', 'ä', 'Ä', 'o', 'O', 'ô', 'Ô', 'œ', 'œ', 'p', 'P'], + \ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l')) + " ... whereas with a Swedish locale, the accentuated letters are ordered + " after Z. + elseif lc =~? '"sv.*utf-\?8"' + call assert_equal(['a', 'A', 'o', 'O', 'p', 'P', 'ä', 'Ä', 'œ', 'œ', 'ô', 'Ô'], + \ sort(['A', 'a', 'o', 'O', 'œ', 'œ', 'p', 'P', 'Ä', 'ä', 'ô', 'Ô'], 'l')) + endif endfunc func Test_sort_numeric() @@ -1223,6 +1242,58 @@ func Test_sort_cmd() \ }, \ ] + " With the following locales, the accentuated letters are ordered + " similarly to the non-accentuated letters... + let lc = execute('language collate') + if lc =~? '"\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8"' + let tests += [ + \ { + \ 'name' : 'sort with locale', + \ 'cmd' : '%sort l', + \ 'input' : [ + \ 'A', + \ 'E', + \ 'O', + \ 'À', + \ 'È', + \ 'É', + \ 'Ô', + \ 'Œ', + \ 'Z', + \ 'a', + \ 'e', + \ 'o', + \ 'à', + \ 'è', + \ 'é', + \ 'ô', + \ 'œ', + \ 'z' + \ ], + \ 'expected' : [ + \ 'a', + \ 'A', + \ 'à', + \ 'À', + \ 'e', + \ 'E', + \ 'é', + \ 'É', + \ 'è', + \ 'È', + \ 'o', + \ 'O', + \ 'ô', + \ 'Ô', + \ 'œ', + \ 'Œ', + \ 'z', + \ 'Z' + \ ] + \ }, + \ ] + endif + for t in tests enew! call append(0, t.input) |