aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/nvim/eval.lua1
-rw-r--r--src/nvim/generators/gen_unicode_tables.lua6
-rw-r--r--src/nvim/mbyte.c166
-rw-r--r--src/nvim/mbyte.h1
-rw-r--r--src/nvim/testdir/test_utf8.vim35
5 files changed, 205 insertions, 4 deletions
diff --git a/src/nvim/eval.lua b/src/nvim/eval.lua
index 6d8776d08b..a2272f0c98 100644
--- a/src/nvim/eval.lua
+++ b/src/nvim/eval.lua
@@ -327,6 +327,7 @@ return {
serverstop={args=1},
setbufline={args=3, base=3},
setbufvar={args=3, base=3},
+ setcellwidths={args=1, base=1},
setcharpos={args=2, base=2},
setcharsearch={args=1, base=1},
setcmdpos={args=1, base=1},
diff --git a/src/nvim/generators/gen_unicode_tables.lua b/src/nvim/generators/gen_unicode_tables.lua
index aa96c97bc1..36553f4649 100644
--- a/src/nvim/generators/gen_unicode_tables.lua
+++ b/src/nvim/generators/gen_unicode_tables.lua
@@ -12,8 +12,8 @@
-- 2 then interval applies only to first, third, fifth, … character in range.
-- Fourth value is number that should be added to the codepoint to yield
-- folded/lower/upper codepoint.
--- 4. emoji_width and emoji_all tables: sorted lists of non-overlapping closed
--- intervals of Emoji characters. emoji_width contains all the characters
+-- 4. emoji_wide and emoji_all tables: sorted lists of non-overlapping closed
+-- intervals of Emoji characters. emoji_wide contains all the characters
-- which don't have ambiguous or double width, and emoji_all has all Emojis.
if arg[1] == '--help' then
print('Usage:')
@@ -288,7 +288,7 @@ local build_emoji_table = function(ut_fp, emojiprops, doublewidth, ambiwidth)
end
ut_fp:write('};\n')
- ut_fp:write('static const struct interval emoji_width[] = {\n')
+ ut_fp:write('static const struct interval emoji_wide[] = {\n')
for _, p in ipairs(emojiwidth) do
ut_fp:write(make_range(p[1], p[2]))
end
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index 223b4d6845..66262ebfad 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -74,6 +74,19 @@ struct interval {
# include "unicode_tables.generated.h"
#endif
+static char e_list_item_nr_is_not_list[]
+ = N_("E1109: List item %d is not a List");
+static char e_list_item_nr_does_not_contain_3_numbers[]
+ = N_("E1110: List item %d does not contain 3 numbers");
+static char e_list_item_nr_range_invalid[]
+ = N_("E1111: List item %d range invalid");
+static char e_list_item_nr_cell_width_invalid[]
+ = N_("E1112: List item %d cell width invalid");
+static char e_overlapping_ranges_for_nr[]
+ = N_("E1113: Overlapping ranges for %lx");
+static char e_only_values_of_0x100_and_higher_supported[]
+ = N_("E1114: Only values of 0x100 and higher supported");
+
// To speed up BYTELEN(); keep a lookup table to quickly get the length in
// bytes of a UTF-8 character from the first byte of a UTF-8 string. Bytes
// which are illegal when used as the first byte have a 1. The NUL byte has
@@ -472,13 +485,18 @@ static bool intable(const struct interval *table, size_t n_items, int c)
int utf_char2cells(int c)
{
if (c >= 0x100) {
+ int n = cw_value(c);
+ if (n != 0) {
+ return n;
+ }
+
if (!utf_printable(c)) {
return 6; // unprintable, displays <xxxx>
}
if (intable(doublewidth, ARRAY_SIZE(doublewidth), c)) {
return 2;
}
- if (p_emoji && intable(emoji_width, ARRAY_SIZE(emoji_width), c)) {
+ if (p_emoji && intable(emoji_wide, ARRAY_SIZE(emoji_wide), c)) {
return 2;
}
} else if (c >= 0x80 && !vim_isprintc(c)) {
@@ -2678,3 +2696,149 @@ char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp
return retval;
}
+
+/// Table set by setcellwidths().
+typedef struct {
+ long first;
+ long last;
+ char width;
+} cw_interval_T;
+
+static cw_interval_T *cw_table = NULL;
+static size_t cw_table_size = 0;
+
+/// Return the value of the cellwidth table for the character `c`.
+///
+/// @param c The source character.
+/// @return 1 or 2 when `c` is in the cellwidth table, 0 if not.
+static int cw_value(int c)
+{
+ if (cw_table == NULL) {
+ return 0;
+ }
+
+ // first quick check for Latin1 etc. characters
+ if (c < cw_table[0].first) {
+ return 0;
+ }
+
+ // binary search in table
+ int bot = 0;
+ int top = (int)cw_table_size - 1;
+ while (top >= bot) {
+ int mid = (bot + top) / 2;
+ if (cw_table[mid].last < c) {
+ bot = mid + 1;
+ } else if (cw_table[mid].first > c) {
+ top = mid - 1;
+ } else {
+ return cw_table[mid].width;
+ }
+ }
+ return 0;
+}
+
+static int tv_nr_compare(const void *a1, const void *a2)
+{
+ const listitem_T *const li1 = (const listitem_T *)a1;
+ const listitem_T *const li2 = (const listitem_T *)a2;
+
+ return (int)(TV_LIST_ITEM_TV(li1)->vval.v_number - TV_LIST_ITEM_TV(li2)->vval.v_number);
+}
+
+/// "setcellwidths()" function
+void f_setcellwidths(typval_T *argvars, typval_T *rettv, FunPtr fptr)
+{
+ if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) {
+ emsg(_(e_listreq));
+ return;
+ }
+ const list_T *const l = argvars[0].vval.v_list;
+ if (tv_list_len(l) == 0) {
+ // Clearing the table.
+ xfree(cw_table);
+ cw_table = NULL;
+ cw_table_size = 0;
+ return;
+ }
+
+ const listitem_T **ptrs = xmalloc(sizeof(const listitem_T *) * (size_t)tv_list_len(l));
+
+ // Check that all entries are a list with three numbers, the range is
+ // valid and the cell width is valid.
+ int item = 0;
+ TV_LIST_ITER_CONST(l, li, {
+ const typval_T *const li_tv = TV_LIST_ITEM_TV(li);
+
+ if (li_tv->v_type != VAR_LIST || li_tv->vval.v_list == NULL) {
+ semsg(_(e_list_item_nr_is_not_list), item);
+ xfree(ptrs);
+ return;
+ }
+
+ const list_T *const li_l = li_tv->vval.v_list;
+ const listitem_T *lili = tv_list_first(li_l);
+ int i = 0;
+ varnumber_T n1;
+ for (; lili != NULL; lili = TV_LIST_ITEM_NEXT(li_l, lili), i++) {
+ const typval_T *const lili_tv = TV_LIST_ITEM_TV(lili);
+ if (lili_tv->v_type != VAR_NUMBER) {
+ break;
+ }
+ if (i == 0) {
+ n1 = lili_tv->vval.v_number;
+ if (n1 < 0x100) {
+ emsg(_(e_only_values_of_0x100_and_higher_supported));
+ xfree(ptrs);
+ return;
+ }
+ } else if (i == 1 && lili_tv->vval.v_number < n1) {
+ semsg(_(e_list_item_nr_range_invalid), item);
+ xfree(ptrs);
+ return;
+ } else if (i == 2 && (lili_tv->vval.v_number < 1 || lili_tv->vval.v_number > 2)) {
+ semsg(_(e_list_item_nr_cell_width_invalid), item);
+ xfree(ptrs);
+ return;
+ }
+ }
+
+ if (i != 3) {
+ semsg(_(e_list_item_nr_does_not_contain_3_numbers), item);
+ xfree(ptrs);
+ return;
+ }
+
+ ptrs[item++] = lili;
+ });
+
+ // Sort the list on the first number.
+ qsort((void *)ptrs, (size_t)tv_list_len(l), sizeof(const listitem_T *), tv_nr_compare);
+
+ cw_interval_T *table = xmalloc(sizeof(cw_interval_T) * (size_t)tv_list_len(l));
+
+ // Store the items in the new table.
+ item = 0;
+ TV_LIST_ITER_CONST(l, li, {
+ const list_T *const li_l = TV_LIST_ITEM_TV(li)->vval.v_list;
+ const listitem_T *lili = tv_list_first(li_l);
+ const varnumber_T n1 = TV_LIST_ITEM_TV(lili)->vval.v_number;
+ if (item > 0 && n1 <= table[item - 1].last) {
+ semsg(_(e_overlapping_ranges_for_nr), (long)n1);
+ xfree(ptrs);
+ xfree(table);
+ return;
+ }
+ table[item].first = n1;
+ lili = TV_LIST_ITEM_NEXT(li_l, lili);
+ table[item].last = TV_LIST_ITEM_TV(lili)->vval.v_number;
+ lili = TV_LIST_ITEM_NEXT(li_l, lili);
+ table[item].width = (char)TV_LIST_ITEM_TV(lili)->vval.v_number;
+ item++;
+ });
+
+ xfree(ptrs);
+ xfree(cw_table);
+ cw_table = table;
+ cw_table_size = (size_t)tv_list_len(l);
+}
diff --git a/src/nvim/mbyte.h b/src/nvim/mbyte.h
index ffa8411675..2a9afcbd03 100644
--- a/src/nvim/mbyte.h
+++ b/src/nvim/mbyte.h
@@ -5,6 +5,7 @@
#include <stdint.h>
#include <string.h>
+#include "nvim/eval/typval.h"
#include "nvim/func_attr.h"
#include "nvim/mbyte_defs.h"
#include "nvim/os/os_defs.h" // For indirect
diff --git a/src/nvim/testdir/test_utf8.vim b/src/nvim/testdir/test_utf8.vim
index 9b010a5dbc..c5dfd85e5e 100644
--- a/src/nvim/testdir/test_utf8.vim
+++ b/src/nvim/testdir/test_utf8.vim
@@ -140,6 +140,41 @@ func Test_list2str_str2list_latin1()
call assert_equal(s, sres)
endfunc
+func Test_setcellwidths()
+ call setcellwidths([
+ \ [0x1330, 0x1330, 2],
+ \ [0x1337, 0x1339, 2],
+ \ [9999, 10000, 1],
+ \])
+
+ call assert_equal(2, strwidth("\u1330"))
+ call assert_equal(1, strwidth("\u1336"))
+ call assert_equal(2, strwidth("\u1337"))
+ call assert_equal(2, strwidth("\u1339"))
+ call assert_equal(1, strwidth("\u133a"))
+
+ call setcellwidths([])
+
+ call assert_fails('call setcellwidths(1)', 'E714:')
+
+ call assert_fails('call setcellwidths([1, 2, 0])', 'E1109:')
+
+ call assert_fails('call setcellwidths([[0x101]])', 'E1110:')
+ call assert_fails('call setcellwidths([[0x101, 0x102]])', 'E1110:')
+ call assert_fails('call setcellwidths([[0x101, 0x102, 1, 4]])', 'E1110:')
+ call assert_fails('call setcellwidths([["a"]])', 'E1110:')
+
+ call assert_fails('call setcellwidths([[0x102, 0x101, 1]])', 'E1111:')
+
+ call assert_fails('call setcellwidths([[0x101, 0x102, 0]])', 'E1112:')
+ call assert_fails('call setcellwidths([[0x101, 0x102, 3]])', 'E1112:')
+
+ call assert_fails('call setcellwidths([[0x111, 0x122, 1], [0x115, 0x116, 2]])', 'E1113:')
+ call assert_fails('call setcellwidths([[0x111, 0x122, 1], [0x122, 0x123, 2]])', 'E1113:')
+
+ call assert_fails('call setcellwidths([[0x33, 0x44, 2]])', 'E1114:')
+endfunc
+
func Test_print_overlong()
" Text with more composing characters than MB_MAXBYTES.
new