From 5970157e1d22fd5e05ae5d3bd949f807fb7a744c Mon Sep 17 00:00:00 2001
From: bfredl <bjorn.linse@gmail.com>
Date: Wed, 17 May 2023 16:08:06 +0200
Subject: refactor(map): enhanced implementation, Clean Code™, etc etc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This involves two redesigns of the map.c implementations:

1. Change of macro style and code organization

The old khash.h and map.c implementation used huge #define blocks with a
lot of backslash line continuations.

This instead uses the "implementation file" .c.h pattern. Such a file is
meant to be included multiple times, with different macros set prior to
inclusion as parameters. we already use this pattern e.g. for
eval/typval_encode.c.h to implement different typval encoders reusing a
similar structure.

We can structure this code into two parts. one that only depends on key
type and is enough to implement sets, and one which depends on both key
and value to implement maps (as a wrapper around sets, with an added
value[] array)

2. Separate the main hash buckets from the key / value arrays

Change the hack buckets to only contain an index into separate key /
value arrays
This is a common pattern in modern, state of the art hashmap
implementations. Even though this leads to one more allocated array, it
is this often is a net reduction of memory consumption. Consider
key+value consuming at least 12 bytes per pair. On average, we will have
twice as many buckets per item.
Thus old implementation:

  2*12 = 24 bytes per item

New implementation

  1*12 + 2*4 = 20 bytes per item

And the difference gets bigger with larger items.
One might think we have pulled a fast one here, as wouldn't the average size of
the new key/value arrays be 1.5 slots per items due to amortized grows?
But remember, these arrays are fully dense, and thus the accessed memory,
measured in _cache lines_, the unit which actually matters, will be the
fully used memory but just rounded up to the nearest cache line
boundary.

This has some other interesting properties, such as an insert-only
set/map will be fully ordered by insert only. Preserving this ordering
in face of deletions is more tricky tho. As we currently don't use
ordered maps, the "delete" operation maintains compactness of the item
arrays in the simplest way by breaking the ordering. It would be
possible to implement an order-preserving delete although at some cost,
like allowing the items array to become non-dense until the next rehash.

Finally, in face of these two major changes, all code used in khash.h
has been integrated into map.c and friends. Given the heavy edits it
makes no sense to "layer" the code into a vendored and a wrapper part.
Rather, the layered cake follows the specialization depth: code shared
for all maps, code specialized to a key type (and its equivalence
relation), and finally code specialized to value+key type.
---
 src/nvim/map_value_impl.c.h | 64 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 src/nvim/map_value_impl.c.h

(limited to 'src/nvim/map_value_impl.c.h')

diff --git a/src/nvim/map_value_impl.c.h b/src/nvim/map_value_impl.c.h
new file mode 100644
index 0000000000..fffda280f0
--- /dev/null
+++ b/src/nvim/map_value_impl.c.h
@@ -0,0 +1,64 @@
+#include "nvim/assert.h"
+#include "nvim/map.h"
+
+#if !defined(KEY_NAME) || !defined(VAL_NAME)
+// Don't error out. it is nice to type-check the file in isolation, in clangd or otherwise
+# define KEY_NAME(x) x##int
+# define VAL_NAME(x) quasiquote(x, ptr_t)
+#endif
+
+#define MAP_NAME(x) VAL_NAME(KEY_NAME(x))
+#define MAP_TYPE MAP_NAME(Map_)
+#define KEY_TYPE KEY_NAME()
+#define VALUE_TYPE VAL_NAME()
+#define INITIALIZER VAL_NAME(value_init_)
+
+VALUE_TYPE *MAP_NAME(map_ref_)(MAP_TYPE *map, KEY_TYPE key, KEY_TYPE **key_alloc)
+{
+  uint32_t k = KEY_NAME(mh_get_)(&map->set, key);
+  if (k == MH_TOMBSTONE) {
+    return NULL;
+  }
+  if (key_alloc) {
+    *key_alloc = &map->set.keys[k];
+  }
+  return &map->values[k];
+}
+
+VALUE_TYPE *MAP_NAME(map_put_ref_)(MAP_TYPE *map, KEY_TYPE key, KEY_TYPE **key_alloc,
+                                   bool *new_item)
+{
+  MhPutStatus status;
+  uint32_t k = KEY_NAME(mh_put_)(&map->set, key, &status);
+  if (status != kMHExisting) {
+    if (status == kMHNewKeyRealloc) {
+      map->values = xrealloc(map->values, map->set.h.keys_capacity * sizeof(VALUE_TYPE));
+    }
+    map->values[k] = INITIALIZER;
+  }
+  if (new_item) {
+    *new_item = (status != kMHExisting);
+  }
+  if (key_alloc) {
+    *key_alloc = &map->set.keys[k];
+  }
+  return &map->values[k];
+}
+
+VALUE_TYPE MAP_NAME(map_del_)(MAP_TYPE *map, KEY_TYPE key, KEY_TYPE *key_alloc)
+{
+  VALUE_TYPE rv = INITIALIZER;
+  uint32_t k = KEY_NAME(mh_delete_)(&map->set, &key);
+  if (k == MH_TOMBSTONE) {
+    return rv;
+  }
+
+  if (key_alloc) {
+    *key_alloc = key;
+  }
+  rv = map->values[k];
+  if (k != map->set.h.n_keys) {
+    map->values[k] = map->values[map->set.h.n_keys];
+  }
+  return rv;
+}
-- 
cgit 


From 8da986ea877b07a5eb117446f410f2a7fc8cd9cb Mon Sep 17 00:00:00 2001
From: bfredl <bjorn.linse@gmail.com>
Date: Wed, 13 Sep 2023 13:39:18 +0200
Subject: refactor(grid): change schar_T representation to be more compact

Previously, a screen cell would occupy 28+4=32 bytes per cell
as we always made space for up to MAX_MCO+1 codepoints in a cell.

As an example, even a pretty modest 50*80 screen would consume

50*80*2*32 = 256000, i e a quarter megabyte

With the factor of two due to the TUI side buffer, and even more when
using msg_grid and/or ext_multigrid.

This instead stores a 4-byte union of either:
- a valid UTF-8 sequence up to 4 bytes
- an escape char which is invalid UTF-8 (0xFF) plus a 24-bit index to a
  glyph cache

This avoids allocating space for huge composed glyphs _upfront_, while
still keeping rendering such glyphs reasonably fast (1 hash table lookup
+ one plain index lookup). If the same large glyphs are using repeatedly
on the screen, this is still a net reduction of memory/cache
consumption. The only case which really gets worse is if you blast
the screen full with crazy emojis and zalgo text and even this case
only leads to 4 extra bytes per char.

When only <= 4-byte glyphs are used, plus the 4-byte attribute code,
i e 8 bytes in total there is a factor of four reduction of memory use.
Memory which will be quite hot in cache as the screen buffer is scanned
over in win_line() buffer text drawing

A slight complication is that the representation depends on host byte
order. I've tested this manually by compling and running this
in qemu-s390x and it works fine. We might add a qemu based solution
to CI at some point.
---
 src/nvim/map_value_impl.c.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/nvim/map_value_impl.c.h')

diff --git a/src/nvim/map_value_impl.c.h b/src/nvim/map_value_impl.c.h
index fffda280f0..8f07bd8fa7 100644
--- a/src/nvim/map_value_impl.c.h
+++ b/src/nvim/map_value_impl.c.h
@@ -28,7 +28,7 @@ VALUE_TYPE *MAP_NAME(map_ref_)(MAP_TYPE *map, KEY_TYPE key, KEY_TYPE **key_alloc
 VALUE_TYPE *MAP_NAME(map_put_ref_)(MAP_TYPE *map, KEY_TYPE key, KEY_TYPE **key_alloc,
                                    bool *new_item)
 {
-  MhPutStatus status;
+  MHPutStatus status;
   uint32_t k = KEY_NAME(mh_put_)(&map->set, key, &status);
   if (status != kMHExisting) {
     if (status == kMHNewKeyRealloc) {
-- 
cgit 


From 79b6ff28ad1204fbb4199b9092f5c578d88cb28e Mon Sep 17 00:00:00 2001
From: dundargoc <gocdundar@gmail.com>
Date: Tue, 28 Nov 2023 20:31:00 +0100
Subject: refactor: fix headers with IWYU

---
 src/nvim/map_value_impl.c.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/nvim/map_value_impl.c.h')

diff --git a/src/nvim/map_value_impl.c.h b/src/nvim/map_value_impl.c.h
index 8f07bd8fa7..d93856c25d 100644
--- a/src/nvim/map_value_impl.c.h
+++ b/src/nvim/map_value_impl.c.h
@@ -1,5 +1,5 @@
-#include "nvim/assert.h"
-#include "nvim/map.h"
+#include "nvim/assert_defs.h"
+#include "nvim/map_defs.h"
 
 #if !defined(KEY_NAME) || !defined(VAL_NAME)
 // Don't error out. it is nice to type-check the file in isolation, in clangd or otherwise
-- 
cgit