refactor(grid): change schar_T representation to be more compact

Previously, a screen cell would occupy 28+4=32 bytes per cell as we always made space for up to MAX_MCO+1 codepoints in a cell. As an example, even a pretty modest 50*80 screen would consume 50*80*2*32 = 256000, i e a quarter megabyte With the factor of two due to the TUI side buffer, and even more when using msg_grid and/or ext_multigrid. This instead stores a 4-byte union of either: - a valid UTF-8 sequence up to 4 bytes - an escape char which is invalid UTF-8 (0xFF) plus a 24-bit index to a glyph cache This avoids allocating space for huge composed glyphs _upfront_, while still keeping rendering such glyphs reasonably fast (1 hash table lookup + one plain index lookup). If the same large glyphs are using repeatedly on the screen, this is still a net reduction of memory/cache consumption. The only case which really gets worse is if you blast the screen full with crazy emojis and zalgo text and even this case only leads to 4 extra bytes per char. When only <= 4-byte glyphs are used, plus the 4-byte attribute code, i e 8 bytes in total there is a factor of four reduction of memory use. Memory which will be quite hot in cache as the screen buffer is scanned over in win_line() buffer text drawing A slight complication is that the representation depends on host byte order. I've tested this manually by compling and running this in qemu-s390x and it works fine. We might add a qemu based solution to CI at some point.
author: bfredl <bjorn.linse@gmail.com> 2023-09-13 13:39:18 +0200
committer: bfredl <bjorn.linse@gmail.com> 2023-09-19 11:25:31 +0200
commit: 8da986ea877b07a5eb117446f410f2a7fc8cd9cb (patch)
tree: 2875a09e73c37bcb2b65d92093a2092d008869e0 /src/nvim/map_glyph_cache.c
parent: 46402c16c0c38701469c52fb28d16f2483cc7a72 (diff)
download: rneovim-8da986ea877b07a5eb117446f410f2a7fc8cd9cb.tar.gz
rneovim-8da986ea877b07a5eb117446f410f2a7fc8cd9cb.tar.bz2
rneovim-8da986ea877b07a5eb117446f410f2a7fc8cd9cb.zip
1 files changed, 102 insertions, 0 deletions
diff --git a/src/nvim/map_glyph_cache.c b/src/nvim/map_glyph_cache.c
new file mode 100644
index 0000000000..6dcbfe0532
--- /dev/null
+++ b/src/nvim/map_glyph_cache.c
@@ -0,0 +1,102 @@
+// This is an open source non-commercial project. Dear PVS-Studio, please check
+// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
+
+// Specialized version of Set() where interned strings is stored in a compact,
+// NUL-separated char array.
+// `String key` lookup keys don't need to be NULL terminated, but they
+// must not contain embedded NUL:s. When reading a key from set->keys, they
+// are always NUL terminated, though. Thus, it is enough to store an index into
+// this array, and use strlen(), to retrive an interned key.
+
+#include "nvim/api/private/helpers.h"
+#include "nvim/map.h"
+
+uint32_t mh_find_bucket_glyph(Set(glyph) *set, String key, bool put)
+{
+  MapHash *h = &set->h;
+  uint32_t step = 0;
+  uint32_t mask = h->n_buckets - 1;
+  uint32_t k = hash_String(key);
+  uint32_t i = k & mask;
+  uint32_t last = i;
+  uint32_t site = put ? last : MH_TOMBSTONE;
+  while (!mh_is_empty(h, i)) {
+    if (mh_is_del(h, i)) {
+      if (site == last) {
+        site = i;
+      }
+    } else if (equal_String(cstr_as_string(&set->keys[h->hash[i] - 1]), key)) {
+      return i;
+    }
+    i = (i + (++step)) & mask;
+    if (i == last) {
+      abort();
+    }
+  }
+  if (site == last) {
+    site = i;
+  }
+  return site;
+}
+
+/// @return index into set->keys if found, MH_TOMBSTONE otherwise
+uint32_t mh_get_glyph(Set(glyph) *set, String key)
+{
+  if (set->h.n_buckets == 0) {
+    return MH_TOMBSTONE;
+  }
+  uint32_t idx = mh_find_bucket_glyph(set, key, false);
+  return (idx != MH_TOMBSTONE) ? set->h.hash[idx] - 1 : MH_TOMBSTONE;
+}
+
+void mh_rehash_glyph(Set(glyph) *set)
+{
+  // assume the format of set->keys, i e NUL terminated strings
+  for (uint32_t k = 0; k < set->h.n_keys; k += (uint32_t)strlen(&set->keys[k]) + 1) {
+    uint32_t idx = mh_find_bucket_glyph(set, cstr_as_string(&set->keys[k]), true);
+    // there must be tombstones when we do a rehash
+    if (!mh_is_empty((&set->h), idx)) {
+      abort();
+    }
+    set->h.hash[idx] = k + 1;
+  }
+  set->h.n_occupied = set->h.size = set->h.n_keys;
+}
+
+uint32_t mh_put_glyph(Set(glyph) *set, String key, MHPutStatus *new)
+{
+  MapHash *h = &set->h;
+  // Might rehash ahead of time if "key" already existed. But it was
+  // going to happen soon anyway.
+  if (h->n_occupied >= h->upper_bound) {
+    mh_realloc(h, h->n_buckets + 1);
+    mh_rehash_glyph(set);
+  }
+
+  uint32_t idx = mh_find_bucket_glyph(set, key, true);
+
+  if (mh_is_either(h, idx)) {
+    h->size++;
+    h->n_occupied++;
+
+    uint32_t size = (uint32_t)key.size + 1;  // NUL takes space
+    uint32_t pos = h->n_keys;
+    h->n_keys += size;
+    if (h->n_keys > h->keys_capacity) {
+      h->keys_capacity = MAX(h->keys_capacity * 2, 64);
+      set->keys = xrealloc(set->keys, h->keys_capacity * sizeof(char));
+      *new = kMHNewKeyRealloc;
+    } else {
+      *new = kMHNewKeyDidFit;
+    }
+    memcpy(&set->keys[pos], key.data, key.size);
+    set->keys[pos + key.size] = NUL;
+    h->hash[idx] = pos + 1;
+    return pos;
+  } else {
+    *new = kMHExisting;
+    uint32_t pos = h->hash[idx] - 1;
+    assert(equal_String(cstr_as_string(&set->keys[pos]), key));
+    return pos;
+  }
+}
author	bfredl <bjorn.linse@gmail.com>	2023-09-13 13:39:18 +0200
committer	bfredl <bjorn.linse@gmail.com>	2023-09-19 11:25:31 +0200
commit	8da986ea877b07a5eb117446f410f2a7fc8cd9cb (patch)
tree	2875a09e73c37bcb2b65d92093a2092d008869e0 /src/nvim/map_glyph_cache.c
parent	46402c16c0c38701469c52fb28d16f2483cc7a72 (diff)
download	rneovim-8da986ea877b07a5eb117446f410f2a7fc8cd9cb.tar.gz rneovim-8da986ea877b07a5eb117446f410f2a7fc8cd9cb.tar.bz2 rneovim-8da986ea877b07a5eb117446f410f2a7fc8cd9cb.zip