8 files changed, 233 insertions, 5 deletions
diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt
index c56ab70774..447f1c89e2 100644
--- a/runtime/doc/builtin.txt
+++ b/runtime/doc/builtin.txt
@@ -396,6 +396,7 @@ setbufline({expr}, {lnum}, {text})
 				Number	set line {lnum} to {text} in buffer
 					{expr}
 setbufvar({buf}, {varname}, {val})	set {varname} in buffer {buf} to {val}
+setcellwidths({list})		none	set character cell width overrides
 setcharpos({expr}, {list})	Number	set the {expr} position to {list}
 setcharsearch({dict})		Dict	set character search from {dict}
 setcmdpos({pos})		Number	set cursor position in command-line
@@ -6817,6 +6818,29 @@ setbufvar({buf}, {varname}, {val})			*setbufvar()*
 		third argument: >
 			GetValue()->setbufvar(buf, varname)
 
+
+setcellwidths({list})					*setcellwidths()*
+		Specify overrides for cell widths of character ranges.  This
+		tells Vim how wide characters are, counted in screen cells.
+		This overrides 'ambiwidth'.  Example: >
+		   setcellwidths([[0xad, 0xad, 1],
+		   		\ [0x2194, 0x2199, 2]])
+
+<					*E1109* *E1110* *E1111* *E1112* *E1113*
+		The {list} argument is a list of lists with each three
+		numbers. These three numbers are [low, high, width].  "low"
+		and "high" can be the same, in which case this refers to one
+		character. Otherwise it is the range of characters from "low"
+		to "high" (inclusive).  "width" is either 1 or 2, indicating
+		the character width in screen cells.
+		An error is given if the argument is invalid, also when a
+		range overlaps with another.
+		Only characters with value 0x100 and higher can be used.
+
+		To clear the overrides pass an empty list: >
+		   setcellwidths([]);
+
+
 setcharpos({expr}, {list})				*setcharpos()*
 		Same as |setpos()| but uses the specified column number as the
 		character index instead of the byte index in the line.
diff --git a/runtime/doc/options.txt b/runtime/doc/options.txt
index 9d03397821..28922e9c7f 100644
--- a/runtime/doc/options.txt
+++ b/runtime/doc/options.txt
@@ -591,7 +591,9 @@ A jump table for the options with a short description can be found at |Q_op|.
 	"double":	Use twice the width of ASCII characters.
 							*E834* *E835*
 	The value "double" cannot be used if 'listchars' or 'fillchars'
-	contains a character that would be double width.
+
+	The values are overruled for characters specified with
+	|setcellwidths()|.
 
 	There are a number of CJK fonts for which the width of glyphs for
 	those characters are solely based on how many octets they take in
diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt
index 008b9b4e58..bc2f7f077b 100644
--- a/runtime/doc/usr_41.txt
+++ b/runtime/doc/usr_41.txt
@@ -619,6 +619,7 @@ String manipulation:					*string-functions*
 	strchars()		length of a string in characters
 	strwidth()		size of string when displayed
 	strdisplaywidth()	size of string when displayed, deals with tabs
+	setcellwidths()		set character cell width overrides
 	substitute()		substitute a pattern match with a string
 	submatch()		get a specific match in ":s" and substitute()
 	strpart()		get part of a string using byte index
diff --git a/src/nvim/eval.lua b/src/nvim/eval.lua
index 6d8776d08b..a2272f0c98 100644
--- a/src/nvim/eval.lua
+++ b/src/nvim/eval.lua
@@ -327,6 +327,7 @@ return {
     serverstop={args=1},
     setbufline={args=3, base=3},
     setbufvar={args=3, base=3},
+    setcellwidths={args=1, base=1},
     setcharpos={args=2, base=2},
     setcharsearch={args=1, base=1},
     setcmdpos={args=1, base=1},
diff --git a/src/nvim/generators/gen_unicode_tables.lua b/src/nvim/generators/gen_unicode_tables.lua
index aa96c97bc1..36553f4649 100644
--- a/src/nvim/generators/gen_unicode_tables.lua
+++ b/src/nvim/generators/gen_unicode_tables.lua
@@ -12,8 +12,8 @@
 --    2 then interval applies only to first, third, fifth, … character in range.
 --    Fourth value is number that should be added to the codepoint to yield
 --    folded/lower/upper codepoint.
--- 4. emoji_width and emoji_all tables: sorted lists of non-overlapping closed
---    intervals of Emoji characters.  emoji_width contains all the characters
+-- 4. emoji_wide and emoji_all tables: sorted lists of non-overlapping closed
+--    intervals of Emoji characters.  emoji_wide contains all the characters
 --    which don't have ambiguous or double width, and emoji_all has all Emojis.
 if arg[1] == '--help' then
   print('Usage:')
@@ -288,7 +288,7 @@ local build_emoji_table = function(ut_fp, emojiprops, doublewidth, ambiwidth)
   end
   ut_fp:write('};\n')
 
-  ut_fp:write('static const struct interval emoji_width[] = {\n')
+  ut_fp:write('static const struct interval emoji_wide[] = {\n')
   for _, p in ipairs(emojiwidth) do
     ut_fp:write(make_range(p[1], p[2]))
   end
diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c
index 223b4d6845..66262ebfad 100644
--- a/src/nvim/mbyte.c
+++ b/src/nvim/mbyte.c
@@ -74,6 +74,19 @@ struct interval {
 # include "unicode_tables.generated.h"
 #endif
 
+static char e_list_item_nr_is_not_list[]
+  = N_("E1109: List item %d is not a List");
+static char e_list_item_nr_does_not_contain_3_numbers[]
+  = N_("E1110: List item %d does not contain 3 numbers");
+static char e_list_item_nr_range_invalid[]
+  = N_("E1111: List item %d range invalid");
+static char e_list_item_nr_cell_width_invalid[]
+  = N_("E1112: List item %d cell width invalid");
+static char e_overlapping_ranges_for_nr[]
+  = N_("E1113: Overlapping ranges for %lx");
+static char e_only_values_of_0x100_and_higher_supported[]
+  = N_("E1114: Only values of 0x100 and higher supported");
+
 // To speed up BYTELEN(); keep a lookup table to quickly get the length in
 // bytes of a UTF-8 character from the first byte of a UTF-8 string.  Bytes
 // which are illegal when used as the first byte have a 1.  The NUL byte has
@@ -472,13 +485,18 @@ static bool intable(const struct interval *table, size_t n_items, int c)
 int utf_char2cells(int c)
 {
   if (c >= 0x100) {
+    int n = cw_value(c);
+    if (n != 0) {
+      return n;
+    }
+
     if (!utf_printable(c)) {
       return 6;                 // unprintable, displays <xxxx>
     }
     if (intable(doublewidth, ARRAY_SIZE(doublewidth), c)) {
       return 2;
     }
-    if (p_emoji && intable(emoji_width, ARRAY_SIZE(emoji_width), c)) {
+    if (p_emoji && intable(emoji_wide, ARRAY_SIZE(emoji_wide), c)) {
       return 2;
     }
   } else if (c >= 0x80 && !vim_isprintc(c)) {
@@ -2678,3 +2696,149 @@ char_u *string_convert_ext(const vimconv_T *const vcp, char_u *ptr, size_t *lenp
 
   return retval;
 }
+
+/// Table set by setcellwidths().
+typedef struct {
+  long first;
+  long last;
+  char width;
+} cw_interval_T;
+
+static cw_interval_T *cw_table = NULL;
+static size_t cw_table_size = 0;
+
+/// Return the value of the cellwidth table for the character `c`.
+///
+/// @param c The source character.
+/// @return 1 or 2 when `c` is in the cellwidth table, 0 if not.
+static int cw_value(int c)
+{
+  if (cw_table == NULL) {
+    return 0;
+  }
+
+  // first quick check for Latin1 etc. characters
+  if (c < cw_table[0].first) {
+    return 0;
+  }
+
+  // binary search in table
+  int bot = 0;
+  int top = (int)cw_table_size - 1;
+  while (top >= bot) {
+    int mid = (bot + top) / 2;
+    if (cw_table[mid].last < c) {
+      bot = mid + 1;
+    } else if (cw_table[mid].first > c) {
+      top = mid - 1;
+    } else {
+      return cw_table[mid].width;
+    }
+  }
+  return 0;
+}
+
+static int tv_nr_compare(const void *a1, const void *a2)
+{
+  const listitem_T *const li1 = (const listitem_T *)a1;
+  const listitem_T *const li2 = (const listitem_T *)a2;
+
+  return (int)(TV_LIST_ITEM_TV(li1)->vval.v_number - TV_LIST_ITEM_TV(li2)->vval.v_number);
+}
+
+/// "setcellwidths()" function
+void f_setcellwidths(typval_T *argvars, typval_T *rettv, FunPtr fptr)
+{
+  if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) {
+    emsg(_(e_listreq));
+    return;
+  }
+  const list_T *const l = argvars[0].vval.v_list;
+  if (tv_list_len(l) == 0) {
+    // Clearing the table.
+    xfree(cw_table);
+    cw_table = NULL;
+    cw_table_size = 0;
+    return;
+  }
+
+  const listitem_T **ptrs = xmalloc(sizeof(const listitem_T *) * (size_t)tv_list_len(l));
+
+  // Check that all entries are a list with three numbers, the range is
+  // valid and the cell width is valid.
+  int item = 0;
+  TV_LIST_ITER_CONST(l, li, {
+    const typval_T *const li_tv = TV_LIST_ITEM_TV(li);
+
+    if (li_tv->v_type != VAR_LIST || li_tv->vval.v_list == NULL) {
+      semsg(_(e_list_item_nr_is_not_list), item);
+      xfree(ptrs);
+      return;
+    }
+
+    const list_T *const li_l = li_tv->vval.v_list;
+    const listitem_T *lili = tv_list_first(li_l);
+    int i = 0;
+    varnumber_T n1;
+    for (; lili != NULL; lili = TV_LIST_ITEM_NEXT(li_l, lili), i++) {
+      const typval_T *const lili_tv = TV_LIST_ITEM_TV(lili);
+      if (lili_tv->v_type != VAR_NUMBER) {
+        break;
+      }
+      if (i == 0) {
+        n1 = lili_tv->vval.v_number;
+        if (n1 < 0x100) {
+          emsg(_(e_only_values_of_0x100_and_higher_supported));
+          xfree(ptrs);
+          return;
+        }
+      } else if (i == 1 && lili_tv->vval.v_number < n1) {
+        semsg(_(e_list_item_nr_range_invalid), item);
+        xfree(ptrs);
+        return;
+      } else if (i == 2 && (lili_tv->vval.v_number < 1 || lili_tv->vval.v_number > 2)) {
+        semsg(_(e_list_item_nr_cell_width_invalid), item);
+        xfree(ptrs);
+        return;
+      }
+    }
+
+    if (i != 3) {
+      semsg(_(e_list_item_nr_does_not_contain_3_numbers), item);
+      xfree(ptrs);
+      return;
+    }
+
+    ptrs[item++] = lili;
+  });
+
+  // Sort the list on the first number.
+  qsort((void *)ptrs, (size_t)tv_list_len(l), sizeof(const listitem_T *), tv_nr_compare);
+
+  cw_interval_T *table = xmalloc(sizeof(cw_interval_T) * (size_t)tv_list_len(l));
+
+  // Store the items in the new table.
+  item = 0;
+  TV_LIST_ITER_CONST(l, li, {
+    const list_T *const li_l = TV_LIST_ITEM_TV(li)->vval.v_list;
+    const listitem_T *lili = tv_list_first(li_l);
+    const varnumber_T n1 = TV_LIST_ITEM_TV(lili)->vval.v_number;
+    if (item > 0 && n1 <= table[item - 1].last) {
+      semsg(_(e_overlapping_ranges_for_nr), (long)n1);
+      xfree(ptrs);
+      xfree(table);
+      return;
+    }
+    table[item].first = n1;
+    lili = TV_LIST_ITEM_NEXT(li_l, lili);
+    table[item].last = TV_LIST_ITEM_TV(lili)->vval.v_number;
+    lili = TV_LIST_ITEM_NEXT(li_l, lili);
+    table[item].width = (char)TV_LIST_ITEM_TV(lili)->vval.v_number;
+    item++;
+  });
+
+  xfree(ptrs);
+  xfree(cw_table);
+  cw_table = table;
+  cw_table_size = (size_t)tv_list_len(l);
+}
diff --git a/src/nvim/mbyte.h b/src/nvim/mbyte.h
index ffa8411675..2a9afcbd03 100644
--- a/src/nvim/mbyte.h
+++ b/src/nvim/mbyte.h
@@ -5,6 +5,7 @@
 #include <stdint.h>
 #include <string.h>
 
+#include "nvim/eval/typval.h"
 #include "nvim/func_attr.h"
 #include "nvim/mbyte_defs.h"
 #include "nvim/os/os_defs.h"  // For indirect
diff --git a/src/nvim/testdir/test_utf8.vim b/src/nvim/testdir/test_utf8.vim
index 9b010a5dbc..c5dfd85e5e 100644
--- a/src/nvim/testdir/test_utf8.vim
+++ b/src/nvim/testdir/test_utf8.vim
@@ -140,6 +140,41 @@ func Test_list2str_str2list_latin1()
   call assert_equal(s, sres)
 endfunc
 
+func Test_setcellwidths()
+  call setcellwidths([
+        \ [0x1330, 0x1330, 2],
+        \ [0x1337, 0x1339, 2],
+        \ [9999, 10000, 1],
+        \])
+
+  call assert_equal(2, strwidth("\u1330"))
+  call assert_equal(1, strwidth("\u1336"))
+  call assert_equal(2, strwidth("\u1337"))
+  call assert_equal(2, strwidth("\u1339"))
+  call assert_equal(1, strwidth("\u133a"))
+
+  call setcellwidths([])
+
+  call assert_fails('call setcellwidths(1)', 'E714:')
+
+  call assert_fails('call setcellwidths([1, 2, 0])', 'E1109:')
+
+  call assert_fails('call setcellwidths([[0x101]])', 'E1110:')
+  call assert_fails('call setcellwidths([[0x101, 0x102]])', 'E1110:')
+  call assert_fails('call setcellwidths([[0x101, 0x102, 1, 4]])', 'E1110:')
+  call assert_fails('call setcellwidths([["a"]])', 'E1110:')
+
+  call assert_fails('call setcellwidths([[0x102, 0x101, 1]])', 'E1111:')
+
+  call assert_fails('call setcellwidths([[0x101, 0x102, 0]])', 'E1112:')
+  call assert_fails('call setcellwidths([[0x101, 0x102, 3]])', 'E1112:')
+
+  call assert_fails('call setcellwidths([[0x111, 0x122, 1], [0x115, 0x116, 2]])', 'E1113:')
+  call assert_fails('call setcellwidths([[0x111, 0x122, 1], [0x122, 0x123, 2]])', 'E1113:')
+
+  call assert_fails('call setcellwidths([[0x33, 0x44, 2]])', 'E1114:')
+endfunc
+
 func Test_print_overlong()
   " Text with more composing characters than MB_MAXBYTES.
   new