aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--runtime/doc/builtin.txt24
-rw-r--r--runtime/doc/options.txt4
-rw-r--r--runtime/doc/usr_41.txt1
-rw-r--r--src/nvim/eval.lua1
-rw-r--r--src/nvim/generators/gen_unicode_tables.lua6
-rw-r--r--src/nvim/mbyte.c166
-rw-r--r--src/nvim/mbyte.h1
-rw-r--r--src/nvim/testdir/test_utf8.vim35
8 files changed, 233 insertions, 5 deletions
diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt
index c56ab70774..447f1c89e2 100644
--- a/runtime/doc/builtin.txt
+++ b/runtime/doc/builtin.txt
@@ -396,6 +396,7 @@ setbufline({expr}, {lnum}, {text})
Number set line {lnum} to {text} in buffer
{expr}
setbufvar({buf}, {varname}, {val}) set {varname} in buffer {buf} to {val}
+setcellwidths({list}) none set character cell width overrides
setcharpos({expr}, {list}) Number set the {expr} position to {list}
setcharsearch({dict}) Dict set character search from {dict}
setcmdpos({pos}) Number set cursor position in command-line
@@ -6817,6 +6818,29 @@ setbufvar({buf}, {varname}, {val}) *setbufvar()*
third argument: >
GetValue()->setbufvar(buf, varname)
+
+setcellwidths({list}) *setcellwidths()*
+ Specify overrides for cell widths of character ranges. This
+ tells Vim how wide characters are, counted in screen cells.
+ This overrides 'ambiwidth'. Example: >
+ setcellwidths([[0xad, 0xad, 1],
+ \ [0x2194, 0x2199, 2]])
+
+< *E1109* *E1110* *E1111* *E1112* *E1113*
+ The {list} argument is a list of lists with each three
+ numbers. These three numbers are [low, high, width]. "low"
+ and "high" can be the same, in which case this refers to one
+ character. Otherwise it is the range of characters from "low"
+ to "high" (inclusive). "width" is either 1 or 2, indicating
+ the character width in screen cells.
+ An error is given if the argument is invalid, also when a
+ range overlaps with another.
+ Only characters with value 0x100 and higher can be used.
+
+ To clear the overrides pass an empty list: >
+ setcellwidths([]);
+
+
setcharpos({expr}, {list}) *setcharpos()*
Same as |setpos()| but uses the specified column number as the
character index instead of the byte index in the line.
diff --git a/runtime/doc/options.txt b/runtime/doc/options.txt
index 9d03397821..28922e9c7f 100644
--- a/runtime/doc/options.txt
+++ b/runtime/doc/options.txt
@@ -591,7 +591,9 @@ A jump table for the options with a short description can be found at |Q_op|.
"double": Use twice the width of ASCII characters.
*E834* *E835*
The value "double" cannot be used if 'listchars' or 'fillchars'
- contains a character that would be double width.
+
+ The values are overruled for characters specified with
+ |setcellwidths()|.
There are a number of CJK fonts for which the width of glyphs for
those characters are solely based on how many octets they take in
diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt
index 008b9b4e58..bc2f7f077b 100644
--- a/runtime/doc/usr_41.txt
+++ b/runtime/doc/usr_41.txt
@@ -619,6 +619,7 @@ String manipulation: *string-functions*
strchars() length of a string in characters
strwidth() size of string when displayed
strdisplaywidth() size of string when displayed, deals with tabs
+ setcellwidths() set character cell width overrides
substitute() substitute a pattern match with a string
submatch() get a specific match in ":s" and substitute()
strpart() get part of a string using byte index
diff --git a/src/nvim/eval.lua b/src/nvim/eval.lua
index 6d8776d08b..a2272f0c98 100644
--- a/src/nvim/eval.lua
+++ b/src/nvim/eval.lua
@@ -327,6 +327,7 @@ return {
serverstop={args=1},
setbufline={args=3, base=3},
setbufvar={args=3, base=3},
+ setcellwidths={args=1, base=1},
setcharpos={args=2, base=2},
setcharsearch={args=1, base=1},
setcmdpos={args=1, base=1},
diff --git a/src/nvim/generators/gen_unicode_tables.lua b/src/nvim/generators/gen_unicode_tables.lua
index aa96c97bc1..36553f4649 100644
--- a/src/nvim/generators/gen_unicode_tables.lua
+++ b/src/nvim/generators/gen_unicode_tables.lua
@@ -12,8 +12,8 @@
-- 2 then interval applies only to first, third, fifth, … character in range.
-- Fourth value is number that should be added to the codepoint to yield
-- folded/lower/upper codepoint.
--- 4. emoji_width and emoji_all tables: sorted lists of non-overlapping closed
--- intervals of Emoji characters. emoji_width contains all the characters
+-- 4. emoji_wide and emoji_all tables: sorted lists of non-overlapping closed
+-- intervals of Emoji characters. emoji_wide contains all the characters
-- which don't have ambiguous or double width, and emoji_all has all Emojis.
if arg[1] == '--help' then
print('Usage:')
@@ -288,7 +288,7 @@ local build_emoji_table = function(ut_fp, emojiprops, doublewidth, ambiwidth)
end
ut_fp:write('};\n')
- ut_fp:write('static const struct interval emoji_width[] = {\n')
+ ut_fp:write('static const struct interval emoji_wide[] = {\n')
for _, p in ipairs(emojiwidth) do
ut_fp:write(make_range(p[1], p[2]))
end
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index 223b4d6845..66262ebfad 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -74,6 +74,19 @@ struct interval {
# include "unicode_tables.generated.h"
#endif
+static char e_list_item_nr_is_not_list[]
+ = N_("E1109: List item %d is not a List");
+static char e_list_item_nr_does_not_contain_3_numbers[]
+ = N_("E1110: List item %d does not contain 3 numbers");
+static char e_list_item_nr_range_invalid[]
+ = N_("E1111: List item %d range invalid");
+static char e_list_item_nr_cell_width_invalid[]
+ = N_("E1112: List item %d cell width invalid");
+static char e_overlapping_ranges_for_nr[]
+ = N_("E1113: Overlapping ranges for %lx");
+static char e_only_values_of_0x100_and_higher_supported[]
+ = N_("E1114: Only values of 0x100 and higher supported");
+
// To speed up BYTELEN(); keep a lookup table to quickly get the length in
// bytes of a UTF-8 character from the first byte of a UTF-8 string. Bytes
// which are illegal when used as the first byte have a 1. The NUL byte has
@@ -472,13 +485,18 @@ static bool intable(const struct interval *table, size_t n_items, int c)
int utf_char2cells(int c)
{
if (c >= 0x100) {
+ int n = cw_value(c);
+ if (n != 0) {
+ return n;
+ }
+
if (!utf_printable(c)) {
return 6; // unprintable, displays <xxxx>
}
if (intable(doublewidth, ARRAY_SIZE(doublewidth), c)) {
return 2;
}
- if (p_emoji && intable(emoji_width, ARRAY_SIZE(emoji_width), c)) {
+ if (p_emoji && intable(emoji_wide, ARRAY_SIZE(emoji_wide), c)) {
return 2;
}
} else if (c >= 0x80 && !vim_isprintc(c)) {
@@ -2678,3 +2696,149 @@ char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp
return retval;
}
+
+/// Table set by setcellwidths().
+typedef struct {
+ long first;
+ long last;
+ char width;
+} cw_interval_T;
+
+static cw_interval_T *cw_table = NULL;
+static size_t cw_table_size = 0;
+
+/// Return the value of the cellwidth table for the character `c`.
+///
+/// @param c The source character.
+/// @return 1 or 2 when `c` is in the cellwidth table, 0 if not.
+static int cw_value(int c)
+{
+ if (cw_table == NULL) {
+ return 0;
+ }
+
+ // first quick check for Latin1 etc. characters
+ if (c < cw_table[0].first) {
+ return 0;
+ }
+
+ // binary search in table
+ int bot = 0;
+ int top = (int)cw_table_size - 1;
+ while (top >= bot) {
+ int mid = (bot + top) / 2;
+ if (cw_table[mid].last < c) {
+ bot = mid + 1;
+ } else if (cw_table[mid].first > c) {
+ top = mid - 1;
+ } else {
+ return cw_table[mid].width;
+ }
+ }
+ return 0;
+}
+
+static int tv_nr_compare(const void *a1, const void *a2)
+{
+ const listitem_T *const li1 = (const listitem_T *)a1;
+ const listitem_T *const li2 = (const listitem_T *)a2;
+
+ return (int)(TV_LIST_ITEM_TV(li1)->vval.v_number - TV_LIST_ITEM_TV(li2)->vval.v_number);
+}
+
+/// "setcellwidths()" function
+void f_setcellwidths(typval_T *argvars, typval_T *rettv, FunPtr fptr)
+{
+ if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) {
+ emsg(_(e_listreq));
+ return;
+ }
+ const list_T *const l = argvars[0].vval.v_list;
+ if (tv_list_len(l) == 0) {
+ // Clearing the table.
+ xfree(cw_table);
+ cw_table = NULL;
+ cw_table_size = 0;
+ return;
+ }
+
+ const listitem_T **ptrs = xmalloc(sizeof(const listitem_T *) * (size_t)tv_list_len(l));
+
+ // Check that all entries are a list with three numbers, the range is
+ // valid and the cell width is valid.
+ int item = 0;
+ TV_LIST_ITER_CONST(l, li, {
+ const typval_T *const li_tv = TV_LIST_ITEM_TV(li);
+
+ if (li_tv->v_type != VAR_LIST || li_tv->vval.v_list == NULL) {
+ semsg(_(e_list_item_nr_is_not_list), item);
+ xfree(ptrs);
+ return;
+ }
+
+ const list_T *const li_l = li_tv->vval.v_list;
+ const listitem_T *lili = tv_list_first(li_l);
+ int i = 0;
+ varnumber_T n1;
+ for (; lili != NULL; lili = TV_LIST_ITEM_NEXT(li_l, lili), i++) {
+ const typval_T *const lili_tv = TV_LIST_ITEM_TV(lili);
+ if (lili_tv->v_type != VAR_NUMBER) {
+ break;
+ }
+ if (i == 0) {
+ n1 = lili_tv->vval.v_number;
+ if (n1 < 0x100) {
+ emsg(_(e_only_values_of_0x100_and_higher_supported));
+ xfree(ptrs);
+ return;
+ }
+ } else if (i == 1 && lili_tv->vval.v_number < n1) {
+ semsg(_(e_list_item_nr_range_invalid), item);
+ xfree(ptrs);
+ return;
+ } else if (i == 2 && (lili_tv->vval.v_number < 1 || lili_tv->vval.v_number > 2)) {
+ semsg(_(e_list_item_nr_cell_width_invalid), item);
+ xfree(ptrs);
+ return;
+ }
+ }
+
+ if (i != 3) {
+ semsg(_(e_list_item_nr_does_not_contain_3_numbers), item);
+ xfree(ptrs);
+ return;
+ }
+
+ ptrs[item++] = lili;
+ });
+
+ // Sort the list on the first number.
+ qsort((void *)ptrs, (size_t)tv_list_len(l), sizeof(const listitem_T *), tv_nr_compare);
+
+ cw_interval_T *table = xmalloc(sizeof(cw_interval_T) * (size_t)tv_list_len(l));
+
+ // Store the items in the new table.
+ item = 0;
+ TV_LIST_ITER_CONST(l, li, {
+ const list_T *const li_l = TV_LIST_ITEM_TV(li)->vval.v_list;
+ const listitem_T *lili = tv_list_first(li_l);
+ const varnumber_T n1 = TV_LIST_ITEM_TV(lili)->vval.v_number;
+ if (item > 0 && n1 <= table[item - 1].last) {
+ semsg(_(e_overlapping_ranges_for_nr), (long)n1);
+ xfree(ptrs);
+ xfree(table);
+ return;
+ }
+ table[item].first = n1;
+ lili = TV_LIST_ITEM_NEXT(li_l, lili);
+ table[item].last = TV_LIST_ITEM_TV(lili)->vval.v_number;
+ lili = TV_LIST_ITEM_NEXT(li_l, lili);
+ table[item].width = (char)TV_LIST_ITEM_TV(lili)->vval.v_number;
+ item++;
+ });
+
+ xfree(ptrs);
+ xfree(cw_table);
+ cw_table = table;
+ cw_table_size = (size_t)tv_list_len(l);
+}
diff --git a/src/nvim/mbyte.h b/src/nvim/mbyte.h
index ffa8411675..2a9afcbd03 100644
--- a/src/nvim/mbyte.h
+++ b/src/nvim/mbyte.h
@@ -5,6 +5,7 @@
#include <stdint.h>
#include <string.h>
+#include "nvim/eval/typval.h"
#include "nvim/func_attr.h"
#include "nvim/mbyte_defs.h"
#include "nvim/os/os_defs.h" // For indirect
diff --git a/src/nvim/testdir/test_utf8.vim b/src/nvim/testdir/test_utf8.vim
index 9b010a5dbc..c5dfd85e5e 100644
--- a/src/nvim/testdir/test_utf8.vim
+++ b/src/nvim/testdir/test_utf8.vim
@@ -140,6 +140,41 @@ func Test_list2str_str2list_latin1()
call assert_equal(s, sres)
endfunc
+func Test_setcellwidths()
+ call setcellwidths([
+ \ [0x1330, 0x1330, 2],
+ \ [0x1337, 0x1339, 2],
+ \ [9999, 10000, 1],
+ \])
+
+ call assert_equal(2, strwidth("\u1330"))
+ call assert_equal(1, strwidth("\u1336"))
+ call assert_equal(2, strwidth("\u1337"))
+ call assert_equal(2, strwidth("\u1339"))
+ call assert_equal(1, strwidth("\u133a"))
+
+ call setcellwidths([])
+
+ call assert_fails('call setcellwidths(1)', 'E714:')
+
+ call assert_fails('call setcellwidths([1, 2, 0])', 'E1109:')
+
+ call assert_fails('call setcellwidths([[0x101]])', 'E1110:')
+ call assert_fails('call setcellwidths([[0x101, 0x102]])', 'E1110:')
+ call assert_fails('call setcellwidths([[0x101, 0x102, 1, 4]])', 'E1110:')
+ call assert_fails('call setcellwidths([["a"]])', 'E1110:')
+
+ call assert_fails('call setcellwidths([[0x102, 0x101, 1]])', 'E1111:')
+
+ call assert_fails('call setcellwidths([[0x101, 0x102, 0]])', 'E1112:')
+ call assert_fails('call setcellwidths([[0x101, 0x102, 3]])', 'E1112:')
+
+ call assert_fails('call setcellwidths([[0x111, 0x122, 1], [0x115, 0x116, 2]])', 'E1113:')
+ call assert_fails('call setcellwidths([[0x111, 0x122, 1], [0x122, 0x123, 2]])', 'E1113:')
+
+ call assert_fails('call setcellwidths([[0x33, 0x44, 2]])', 'E1114:')
+endfunc
+
func Test_print_overlong()
" Text with more composing characters than MB_MAXBYTES.
new