aboutsummaryrefslogtreecommitdiff
path: root/src/nvim/map.c
diff options
context:
space:
mode:
authorbfredl <bjorn.linse@gmail.com>2023-05-17 16:08:06 +0200
committerbfredl <bjorn.linse@gmail.com>2023-09-08 12:48:46 +0200
commit5970157e1d22fd5e05ae5d3bd949f807fb7a744c (patch)
tree59bd8e97d87fa5da7cc95f290f47b93eaf8dd953 /src/nvim/map.c
parent6a8b48e24cbe070846dd1d234553b3fdeb19460e (diff)
downloadrneovim-5970157e1d22fd5e05ae5d3bd949f807fb7a744c.tar.gz
rneovim-5970157e1d22fd5e05ae5d3bd949f807fb7a744c.tar.bz2
rneovim-5970157e1d22fd5e05ae5d3bd949f807fb7a744c.zip
refactor(map): enhanced implementation, Clean Codeā„¢, etc etc
This involves two redesigns of the map.c implementations: 1. Change of macro style and code organization The old khash.h and map.c implementation used huge #define blocks with a lot of backslash line continuations. This instead uses the "implementation file" .c.h pattern. Such a file is meant to be included multiple times, with different macros set prior to inclusion as parameters. we already use this pattern e.g. for eval/typval_encode.c.h to implement different typval encoders reusing a similar structure. We can structure this code into two parts. one that only depends on key type and is enough to implement sets, and one which depends on both key and value to implement maps (as a wrapper around sets, with an added value[] array) 2. Separate the main hash buckets from the key / value arrays Change the hack buckets to only contain an index into separate key / value arrays This is a common pattern in modern, state of the art hashmap implementations. Even though this leads to one more allocated array, it is this often is a net reduction of memory consumption. Consider key+value consuming at least 12 bytes per pair. On average, we will have twice as many buckets per item. Thus old implementation: 2*12 = 24 bytes per item New implementation 1*12 + 2*4 = 20 bytes per item And the difference gets bigger with larger items. One might think we have pulled a fast one here, as wouldn't the average size of the new key/value arrays be 1.5 slots per items due to amortized grows? But remember, these arrays are fully dense, and thus the accessed memory, measured in _cache lines_, the unit which actually matters, will be the fully used memory but just rounded up to the nearest cache line boundary. This has some other interesting properties, such as an insert-only set/map will be fully ordered by insert only. Preserving this ordering in face of deletions is more tricky tho. As we currently don't use ordered maps, the "delete" operation maintains compactness of the item arrays in the simplest way by breaking the ordering. It would be possible to implement an order-preserving delete although at some cost, like allowing the items array to become non-dense until the next rehash. Finally, in face of these two major changes, all code used in khash.h has been integrated into map.c and friends. Given the heavy edits it makes no sense to "layer" the code into a vendored and a wrapper part. Rather, the layered cake follows the specialization depth: code shared for all maps, code specialized to a key type (and its equivalence relation), and finally code specialized to value+key type.
Diffstat (limited to 'src/nvim/map.c')
-rw-r--r--src/nvim/map.c258
1 files changed, 137 insertions, 121 deletions
diff --git a/src/nvim/map.c b/src/nvim/map.c
index 4c8506f468..e2c6443245 100644
--- a/src/nvim/map.c
+++ b/src/nvim/map.c
@@ -1,122 +1,62 @@
// This is an open source non-commercial project. Dear PVS-Studio, please check
// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
+// map.c: Hash maps and sets
//
-// map.c: khash.h wrapper
+// parts of the implementation derived from khash.h as part of klib (MIT license)
//
// NOTE: Callers must manage memory (allocate) for keys and values.
-// khash.h does not make its own copy of the key or value.
-//
+// Map and Set does not make its own copy of the key or value.
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include "auto/config.h"
-#include "klib/khash.h"
#include "nvim/gettext.h"
#include "nvim/map.h"
#include "nvim/memory.h"
-#define cstr_t_hash kh_str_hash_func
-#define cstr_t_eq kh_str_hash_equal
-#define uint64_t_hash kh_int64_hash_func
-#define uint64_t_eq kh_int64_hash_equal
-#define uint32_t_hash kh_int_hash_func
-#define uint32_t_eq kh_int_hash_equal
-#define int_hash kh_int_hash_func
-#define int_eq kh_int_hash_equal
-#define handle_T_hash kh_int_hash_func
-#define handle_T_eq kh_int_hash_equal
+#define equal_simple(x, y) ((x) == (y))
+
+#define hash_uint64_t(key) (uint32_t)((key) >> 33^(key)^(key) << 11)
+#define equal_uint64_t equal_simple
+#define hash_uint32_t(x) (x)
+#define equal_uint32_t equal_simple
+#define hash_int(x) hash_uint32_t((uint32_t)(x))
+#define equal_int equal_simple
#if defined(ARCH_64)
-# define ptr_t_hash(key) uint64_t_hash((uint64_t)(key))
-# define ptr_t_eq(a, b) uint64_t_eq((uint64_t)(a), (uint64_t)(b))
+# define hash_ptr_t(key) hash_uint64_t((uint64_t)(key))
+# define equal_ptr_t(a, b) equal_uint64_t((uint64_t)(a), (uint64_t)(b))
#elif defined(ARCH_32)
-# define ptr_t_hash(key) uint32_t_hash((uint32_t)(key))
-# define ptr_t_eq(a, b) uint32_t_eq((uint32_t)(a), (uint32_t)(b))
+# define hash_ptr_t(key) hash_uint32_t((uint32_t)(key))
+# define equal_ptr_t(a, b) equal_uint32_t((uint32_t)(a), (uint32_t)(b))
#endif
-#define INITIALIZER(T, U) T##_##U##_initializer
-#define INITIALIZER_DECLARE(T, U, ...) const U INITIALIZER(T, U) = __VA_ARGS__
-#define DEFAULT_INITIALIZER { 0 }
-#define SSIZE_INITIALIZER { -1 }
-
-#define KEY_IMPL(T) \
- __KHASH_IMPL(T, , T, T##_hash, T##_eq) \
-
-#define MAP_IMPL(T, U, ...) \
- INITIALIZER_DECLARE(T, U, __VA_ARGS__); \
- U map_##T##_##U##_get(Map(T, U) *map, T key) \
- { \
- khiter_t k; \
- if ((k = kh_get(T, &map->table, key)) == kh_end(&map->table)) { \
- return INITIALIZER(T, U); \
- } \
- return kh_val(U, &map->table, k); \
- } \
- U map_##T##_##U##_put(Map(T, U) *map, T key, U value) \
- { \
- STATIC_ASSERT(sizeof(U) <= KHASH_MAX_VAL_SIZE, "increase KHASH_MAX_VAL_SIZE"); \
- int ret; \
- U rv = INITIALIZER(T, U); \
- khiter_t k = kh_put(T, &map->table, key, &ret, sizeof(U)); \
- if (!ret) { \
- rv = kh_val(U, &map->table, k); \
- } \
- kh_val(U, &map->table, k) = value; \
- return rv; \
- } \
- U *map_##T##_##U##_ref(Map(T, U) *map, T key, T **key_alloc) \
- { \
- khiter_t k = kh_get(T, &map->table, key); \
- if (k == kh_end(&map->table)) { \
- return NULL; \
- } \
- if (key_alloc) { \
- *key_alloc = &kh_key(&map->table, k); \
- } \
- return &kh_val(U, &map->table, k); \
- } \
- U *map_##T##_##U##_put_ref(Map(T, U) *map, T key, T **key_alloc, bool *new_item) \
- { \
- int ret; \
- khiter_t k = kh_put(T, &map->table, key, &ret, sizeof(U)); \
- if (ret) { \
- kh_val(U, &map->table, k) = INITIALIZER(T, U); \
- } \
- if (new_item) { \
- *new_item = (bool)ret; \
- } \
- if (key_alloc) { \
- *key_alloc = &kh_key(&map->table, k); \
- } \
- return &kh_val(U, &map->table, k); \
- } \
- U map_##T##_##U##_del(Map(T, U) *map, T key, T *key_alloc) \
- { \
- U rv = INITIALIZER(T, U); \
- khiter_t k; \
- if ((k = kh_get(T, &map->table, key)) != kh_end(&map->table)) { \
- rv = kh_val(U, &map->table, k); \
- if (key_alloc) { \
- *key_alloc = kh_key(&map->table, k); \
- } \
- kh_del(T, &map->table, k); \
- } \
- return rv; \
+static inline uint32_t hash_cstr_t(const char *s)
+{
+ uint32_t h = 0;
+ for (size_t i = 0; s[i]; i++) {
+ h = (h << 5) - h + (uint8_t)s[i];
}
+ return h;
+}
+
+#define equal_cstr_t strequal
-static inline khint_t String_hash(String s)
+// when used as a key, String doesn't need to be NUL terminated,
+// and can also contain embedded NUL:s as part of the data.
+static inline uint32_t hash_String(String s)
{
- khint_t h = 0;
- for (size_t i = 0; i < s.size && s.data[i]; i++) {
+ uint32_t h = 0;
+ for (size_t i = 0; i < s.size; i++) {
h = (h << 5) - h + (uint8_t)s.data[i];
}
return h;
}
-static inline bool String_eq(String a, String b)
+static inline bool equal_String(String a, String b)
{
if (a.size != b.size) {
return false;
@@ -124,61 +64,137 @@ static inline bool String_eq(String a, String b)
return memcmp(a.data, b.data, a.size) == 0;
}
-static inline khint_t HlEntry_hash(HlEntry ae)
+static inline uint32_t hash_HlEntry(HlEntry ae)
{
const uint8_t *data = (const uint8_t *)&ae;
- khint_t h = 0;
+ uint32_t h = 0;
for (size_t i = 0; i < sizeof(ae); i++) {
h = (h << 5) - h + data[i];
}
return h;
}
-static inline bool HlEntry_eq(HlEntry ae1, HlEntry ae2)
+static inline bool equal_HlEntry(HlEntry ae1, HlEntry ae2)
{
return memcmp(&ae1, &ae2, sizeof(ae1)) == 0;
}
-static inline khint_t ColorKey_hash(ColorKey ae)
+static inline uint32_t hash_ColorKey(ColorKey ae)
{
const uint8_t *data = (const uint8_t *)&ae;
- khint_t h = 0;
+ uint32_t h = 0;
for (size_t i = 0; i < sizeof(ae); i++) {
h = (h << 5) - h + data[i];
}
return h;
}
-static inline bool ColorKey_eq(ColorKey ae1, ColorKey ae2)
+static inline bool equal_ColorKey(ColorKey ae1, ColorKey ae2)
{
return memcmp(&ae1, &ae2, sizeof(ae1)) == 0;
}
-KEY_IMPL(int)
-KEY_IMPL(cstr_t)
-KEY_IMPL(ptr_t)
-KEY_IMPL(uint64_t)
-KEY_IMPL(uint32_t)
-KEY_IMPL(String)
-KEY_IMPL(HlEntry)
-KEY_IMPL(ColorKey)
-
-MAP_IMPL(int, int, DEFAULT_INITIALIZER)
-MAP_IMPL(int, ptr_t, DEFAULT_INITIALIZER)
-MAP_IMPL(int, cstr_t, DEFAULT_INITIALIZER)
-MAP_IMPL(cstr_t, ptr_t, DEFAULT_INITIALIZER)
-MAP_IMPL(cstr_t, int, DEFAULT_INITIALIZER)
-MAP_IMPL(ptr_t, ptr_t, DEFAULT_INITIALIZER)
-MAP_IMPL(uint32_t, ptr_t, DEFAULT_INITIALIZER)
-MAP_IMPL(uint64_t, ptr_t, DEFAULT_INITIALIZER)
-MAP_IMPL(uint64_t, ssize_t, SSIZE_INITIALIZER)
-MAP_IMPL(uint64_t, uint64_t, DEFAULT_INITIALIZER)
-MAP_IMPL(uint32_t, uint32_t, DEFAULT_INITIALIZER)
-MAP_IMPL(HlEntry, int, DEFAULT_INITIALIZER)
-MAP_IMPL(String, handle_T, 0)
-MAP_IMPL(String, int, DEFAULT_INITIALIZER)
-MAP_IMPL(int, String, DEFAULT_INITIALIZER)
-MAP_IMPL(ColorKey, ColorItem, COLOR_ITEM_INITIALIZER)
+// TODO(bfredl): this could be _less_ for the h->hash part as this is now small (4 bytes per value)
+#define UPPER_FILL 0.77
+
+#define roundup32(x) (--(x), (x)|= (x)>>1, (x)|= (x)>>2, (x)|= (x)>>4, (x)|= (x)>>8, \
+ (x)|= (x)>>16, ++(x))
+
+// h->hash must either be NULL or an already valid pointer
+void mh_realloc(MapHash *h, uint32_t n_min_buckets)
+{
+ xfree(h->hash);
+ uint32_t n_buckets = n_min_buckets < 16 ? 16 : n_min_buckets;
+ roundup32(n_buckets);
+ // sets all buckets to EMPTY
+ h->hash = xcalloc(n_buckets, sizeof *h->hash);
+ h->n_occupied = h->size = 0;
+ h->n_buckets = n_buckets;
+ h->upper_bound = (uint32_t)(h->n_buckets * UPPER_FILL + 0.5);
+}
+
+void mh_clear(MapHash *h)
+{
+ if (h->hash) {
+ memset(h->hash, 0, h->n_buckets * sizeof(*h->hash));
+ h->size = h->n_occupied = 0;
+ h->n_keys = 0;
+ }
+}
+
+#define KEY_NAME(x) x##int
+#include "nvim/map_key_impl.c.h"
+#define VAL_NAME(x) quasiquote(x, int)
+#include "nvim/map_value_impl.c.h"
+#undef VAL_NAME
+#define VAL_NAME(x) quasiquote(x, ptr_t)
+#include "nvim/map_value_impl.c.h"
+#undef VAL_NAME
+#define VAL_NAME(x) quasiquote(x, String)
+#include "nvim/map_value_impl.c.h"
+#undef VAL_NAME
+#undef KEY_NAME
+
+#define KEY_NAME(x) x##ptr_t
+#include "nvim/map_key_impl.c.h"
+#define VAL_NAME(x) quasiquote(x, ptr_t)
+#include "nvim/map_value_impl.c.h"
+#undef VAL_NAME
+#undef KEY_NAME
+
+#define KEY_NAME(x) x##cstr_t
+#include "nvim/map_key_impl.c.h"
+#define VAL_NAME(x) quasiquote(x, ptr_t)
+#include "nvim/map_value_impl.c.h"
+#undef VAL_NAME
+#define VAL_NAME(x) quasiquote(x, int)
+#include "nvim/map_value_impl.c.h"
+#undef VAL_NAME
+#undef KEY_NAME
+
+#define KEY_NAME(x) x##String
+#include "nvim/map_key_impl.c.h"
+#define VAL_NAME(x) quasiquote(x, int)
+#include "nvim/map_value_impl.c.h"
+#undef VAL_NAME
+#undef KEY_NAME
+
+#define KEY_NAME(x) x##uint32_t
+#include "nvim/map_key_impl.c.h"
+#define VAL_NAME(x) quasiquote(x, ptr_t)
+#include "nvim/map_value_impl.c.h"
+#undef VAL_NAME
+#define VAL_NAME(x) quasiquote(x, uint32_t)
+#include "nvim/map_value_impl.c.h"
+#undef VAL_NAME
+#undef KEY_NAME
+
+#define KEY_NAME(x) x##uint64_t
+#include "nvim/map_key_impl.c.h"
+#define VAL_NAME(x) quasiquote(x, ptr_t)
+#include "nvim/map_value_impl.c.h"
+#undef VAL_NAME
+#define VAL_NAME(x) quasiquote(x, ssize_t)
+#include "nvim/map_value_impl.c.h"
+#undef VAL_NAME
+#define VAL_NAME(x) quasiquote(x, uint64_t)
+#include "nvim/map_value_impl.c.h"
+#undef VAL_NAME
+#undef KEY_NAME
+
+#define KEY_NAME(x) x##HlEntry
+#include "nvim/map_key_impl.c.h"
+#define VAL_NAME(x) quasiquote(x, int)
+#include "nvim/map_value_impl.c.h"
+#undef VAL_NAME
+#undef KEY_NAME
+
+#define KEY_NAME(x) x##ColorKey
+#include "nvim/map_key_impl.c.h"
+#define VAL_NAME(x) quasiquote(x, ColorItem)
+#include "nvim/map_value_impl.c.h"
+#undef VAL_NAME
+#undef KEY_NAME
/// Deletes a key:value pair from a string:pointer map, and frees the
/// storage of both key and value.