refactor: delete duplicate utf8-functionality

Also remove British National Replacement Character Set. We keep the DEC Special Graphics and ASCII despite it not being unicode as some old software such as calcurse still rely on this functionality. References: - https://github.com/neovim/neovim/pull/31934#discussion_r1911046426 - https://en.wikipedia.org/wiki/DEC_Special_Graphics - https://vt100.net/docs/vt220-rm/chapter2.html#S2.4.3
author: dundargoc <gocdundar@gmail.com> 2025-01-09 17:28:27 +0100
committer: dundargoc <33953936+dundargoc@users.noreply.github.com> 2025-01-13 13:16:41 +0100
commit: 47866cd8d20c62afa8a3c3929d3aada2db9162f5 (patch)
tree: 77318a1cd5cfacb5f3e703bd98424abb2c1fcdf1
parent: 0631492f9c8044a378dc2a17ea257badfbda6d15 (diff)
download: rneovim-47866cd8d20c62afa8a3c3929d3aada2db9162f5.tar.gz
rneovim-47866cd8d20c62afa8a3c3929d3aada2db9162f5.tar.bz2
rneovim-47866cd8d20c62afa8a3c3929d3aada2db9162f5.zip
4 files changed, 15 insertions, 50 deletions
diff --git a/src/nvim/mbyte_defs.h b/src/nvim/mbyte_defs.h
index e308a81a5d..8d5ff2a8c1 100644
--- a/src/nvim/mbyte_defs.h
+++ b/src/nvim/mbyte_defs.h
@@ -74,3 +74,5 @@ typedef struct {
 } CharBoundsOff;
 
 typedef utf8proc_int32_t GraphemeState;
+
+enum { UNICODE_INVALID = 0xFFFD, };
diff --git a/src/nvim/tui/termkey/termkey.c b/src/nvim/tui/termkey/termkey.c
index 8c4a91e736..eabde2f9f7 100644
--- a/src/nvim/tui/termkey/termkey.c
+++ b/src/nvim/tui/termkey/termkey.c
@@ -634,40 +634,13 @@ static void eat_bytes(TermKey *tk, size_t count)
   tk->buffcount -= count;
 }
 
-// TODO(dundargoc): we should be able to replace this with utf_char2bytes from mbyte.c
 int fill_utf8(int codepoint, char *str)
 {
-  int nbytes = utf_char2len(codepoint);
-
+  int nbytes = utf_char2bytes(codepoint, str);
   str[nbytes] = 0;
-
-  // This is easier done backwards
-  int b = nbytes;
-  while (b > 1) {
-    b--;
-    str[b] = (char)0x80 | (codepoint & 0x3f);
-    codepoint >>= 6;
-  }
-
-  switch (nbytes) {
-  case 1:
-    str[0] = (codepoint & 0x7f); break;
-  case 2:
-    str[0] = (char)0xc0 | (codepoint & 0x1f); break;
-  case 3:
-    str[0] = (char)0xe0 | (codepoint & 0x0f); break;
-  case 4:
-    str[0] = (char)0xf0 | (codepoint & 0x07); break;
-  case 5:
-    str[0] = (char)0xf8 | (codepoint & 0x03); break;
-  case 6:
-    str[0] = (char)0xfc | (codepoint & 0x01); break;
-  }
-
   return nbytes;
 }
 
-#define UTF8_INVALID 0xFFFD
 static TermKeyResult parse_utf8(const unsigned char *bytes, size_t len, int *cp, size_t *nbytep)
 {
   unsigned nbytes;
@@ -681,7 +654,7 @@ static TermKeyResult parse_utf8(const unsigned char *bytes, size_t len, int *cp,
     return TERMKEY_RES_KEY;
   } else if (b0 < 0xc0) {
     // Starts with a continuation byte - that's not right
-    *cp = UTF8_INVALID;
+    *cp = UNICODE_INVALID;
     *nbytep = 1;
     return TERMKEY_RES_KEY;
   } else if (b0 < 0xe0) {
@@ -700,7 +673,7 @@ static TermKeyResult parse_utf8(const unsigned char *bytes, size_t len, int *cp,
     nbytes = 6;
     *cp = b0 & 0x01;
   } else {
-    *cp = UTF8_INVALID;
+    *cp = UNICODE_INVALID;
     *nbytep = 1;
     return TERMKEY_RES_KEY;
   }
@@ -714,7 +687,7 @@ static TermKeyResult parse_utf8(const unsigned char *bytes, size_t len, int *cp,
 
     cb = bytes[b];
     if (cb < 0x80 || cb >= 0xc0) {
-      *cp = UTF8_INVALID;
+      *cp = UNICODE_INVALID;
       *nbytep = b;
       return TERMKEY_RES_KEY;
     }
@@ -725,14 +698,14 @@ static TermKeyResult parse_utf8(const unsigned char *bytes, size_t len, int *cp,
 
   // Check for overlong sequences
   if ((int)nbytes > utf_char2len(*cp)) {
-    *cp = UTF8_INVALID;
+    *cp = UNICODE_INVALID;
   }
 
   // Check for UTF-16 surrogates or invalid *cps
   if ((*cp >= 0xD800 && *cp <= 0xDFFF)
       || *cp == 0xFFFE
       || *cp == 0xFFFF) {
-    *cp = UTF8_INVALID;
+    *cp = UNICODE_INVALID;
   }
 
   *nbytep = nbytes;
@@ -962,9 +935,9 @@ static TermKeyResult peekkey_simple(TermKey *tk, TermKeyKey *key, int force, siz
     if (res == TERMKEY_RES_AGAIN && force) {
       // There weren't enough bytes for a complete UTF-8 sequence but caller
       // demands an answer. About the best thing we can do here is eat as many
-      // bytes as we have, and emit a UTF8_INVALID. If the remaining bytes
+      // bytes as we have, and emit a UNICODE_INVALID. If the remaining bytes
       // arrive later, they'll be invalid too.
-      codepoint = UTF8_INVALID;
+      codepoint = UNICODE_INVALID;
       *nbytep = tk->buffcount;
       res = TERMKEY_RES_KEY;
     }
diff --git a/src/nvim/vterm/encoding.c b/src/nvim/vterm/encoding.c
index cc3208cfa2..f9061e8e50 100644
--- a/src/nvim/vterm/encoding.c
+++ b/src/nvim/vterm/encoding.c
@@ -210,6 +210,7 @@ static void decode_table(VTermEncoding *enc, void *data, uint32_t cp[], int *cpi
   }
 }
 
+// https://en.wikipedia.org/wiki/DEC_Special_Graphics
 static const struct StaticTableEncoding encoding_DECdrawing = {
   { .decode = &decode_table },
   {
@@ -247,13 +248,6 @@ static const struct StaticTableEncoding encoding_DECdrawing = {
   }
 };
 
-static const struct StaticTableEncoding encoding_uk = {
-  { .decode = &decode_table },
-  {
-    [0x23] = 0x00a3,  // £
-  }
-};
-
 static struct {
   VTermEncodingType type;
   char designation;
@@ -262,7 +256,6 @@ static struct {
 encodings[] = {
   { ENC_UTF8,      'u', &encoding_utf8 },
   { ENC_SINGLE_94, '0', (VTermEncoding *)&encoding_DECdrawing },
-  { ENC_SINGLE_94, 'A', (VTermEncoding *)&encoding_uk },
   { ENC_SINGLE_94, 'B', &encoding_usascii },
   { 0 },
 };
diff --git a/test/unit/vterm_spec.lua b/test/unit/vterm_spec.lua
index 0bf4bf70f8..6ff3c18d2a 100644
--- a/test/unit/vterm_spec.lua
+++ b/test/unit/vterm_spec.lua
@@ -28,6 +28,7 @@ local bit = require('bit')
 --- @field parser_sos function
 --- @field parser_text function
 --- @field print_color function
+--- @field schar_get fun(any, any):integer
 --- @field screen_sb_clear function
 --- @field screen_sb_popline function
 --- @field screen_sb_pushline function
@@ -43,6 +44,8 @@ local bit = require('bit')
 --- @field state_setpenattr function
 --- @field state_settermprop function
 --- @field term_output function
+--- @field utf_ptr2char fun(any):integer
+--- @field utf_ptr2len fun(any):integer
 --- @field vterm_input_write function
 --- @field vterm_keyboard_end_paste function
 --- @field vterm_keyboard_key function
@@ -360,7 +363,7 @@ local function screen_cell(row, col, expected, screen)
   pos['row'] = row
   pos['col'] = col
 
-  local cell = t.ffi.new('VTermScreenCell')
+  local cell = t.ffi.new('VTermScreenCell') ---@type any
   vterm.vterm_screen_get_cell(screen, pos, cell)
 
   local buf = t.ffi.new('unsigned char[32]')
@@ -1705,12 +1708,6 @@ putglyph 1f3f4,200d,2620,fe0f 2 0,4]])
     push('#', vt)
     expect('putglyph 23 1 0,0')
 
-    -- Designate G0=UK
-    reset(state, nil)
-    push('\x1b(A', vt)
-    push('#', vt)
-    expect('putglyph a3 1 0,0')
-
     -- Designate G0=DEC drawing
     reset(state, nil)
     push('\x1b(0', vt)
author	dundargoc <gocdundar@gmail.com>	2025-01-09 17:28:27 +0100
committer	dundargoc <33953936+dundargoc@users.noreply.github.com>	2025-01-13 13:16:41 +0100
commit	47866cd8d20c62afa8a3c3929d3aada2db9162f5 (patch)
tree	77318a1cd5cfacb5f3e703bd98424abb2c1fcdf1
parent	0631492f9c8044a378dc2a17ea257badfbda6d15 (diff)
download	rneovim-47866cd8d20c62afa8a3c3929d3aada2db9162f5.tar.gz rneovim-47866cd8d20c62afa8a3c3929d3aada2db9162f5.tar.bz2 rneovim-47866cd8d20c62afa8a3c3929d3aada2db9162f5.zip