diff options
author | bfredl <bjorn.linse@gmail.com> | 2023-05-17 16:08:06 +0200 |
---|---|---|
committer | bfredl <bjorn.linse@gmail.com> | 2023-09-08 12:48:46 +0200 |
commit | 5970157e1d22fd5e05ae5d3bd949f807fb7a744c (patch) | |
tree | 59bd8e97d87fa5da7cc95f290f47b93eaf8dd953 /src/nvim/map.h | |
parent | 6a8b48e24cbe070846dd1d234553b3fdeb19460e (diff) | |
download | rneovim-5970157e1d22fd5e05ae5d3bd949f807fb7a744c.tar.gz rneovim-5970157e1d22fd5e05ae5d3bd949f807fb7a744c.tar.bz2 rneovim-5970157e1d22fd5e05ae5d3bd949f807fb7a744c.zip |
refactor(map): enhanced implementation, Clean Codeā¢, etc etc
This involves two redesigns of the map.c implementations:
1. Change of macro style and code organization
The old khash.h and map.c implementation used huge #define blocks with a
lot of backslash line continuations.
This instead uses the "implementation file" .c.h pattern. Such a file is
meant to be included multiple times, with different macros set prior to
inclusion as parameters. we already use this pattern e.g. for
eval/typval_encode.c.h to implement different typval encoders reusing a
similar structure.
We can structure this code into two parts. one that only depends on key
type and is enough to implement sets, and one which depends on both key
and value to implement maps (as a wrapper around sets, with an added
value[] array)
2. Separate the main hash buckets from the key / value arrays
Change the hack buckets to only contain an index into separate key /
value arrays
This is a common pattern in modern, state of the art hashmap
implementations. Even though this leads to one more allocated array, it
is this often is a net reduction of memory consumption. Consider
key+value consuming at least 12 bytes per pair. On average, we will have
twice as many buckets per item.
Thus old implementation:
2*12 = 24 bytes per item
New implementation
1*12 + 2*4 = 20 bytes per item
And the difference gets bigger with larger items.
One might think we have pulled a fast one here, as wouldn't the average size of
the new key/value arrays be 1.5 slots per items due to amortized grows?
But remember, these arrays are fully dense, and thus the accessed memory,
measured in _cache lines_, the unit which actually matters, will be the
fully used memory but just rounded up to the nearest cache line
boundary.
This has some other interesting properties, such as an insert-only
set/map will be fully ordered by insert only. Preserving this ordering
in face of deletions is more tricky tho. As we currently don't use
ordered maps, the "delete" operation maintains compactness of the item
arrays in the simplest way by breaking the ordering. It would be
possible to implement an order-preserving delete although at some cost,
like allowing the items array to become non-dense until the next rehash.
Finally, in face of these two major changes, all code used in khash.h
has been integrated into map.c and friends. Given the heavy edits it
makes no sense to "layer" the code into a vendored and a wrapper part.
Rather, the layered cake follows the specialization depth: code shared
for all maps, code specialized to a key type (and its equivalence
relation), and finally code specialized to value+key type.
Diffstat (limited to 'src/nvim/map.h')
-rw-r--r-- | src/nvim/map.h | 177 |
1 files changed, 125 insertions, 52 deletions
diff --git a/src/nvim/map.h b/src/nvim/map.h index cc32a20740..a84d533262 100644 --- a/src/nvim/map.h +++ b/src/nvim/map.h @@ -5,7 +5,6 @@ #include <stdint.h> #include <stdio.h> -#include "klib/khash.h" #include "nvim/api/private/defs.h" #include "nvim/assert.h" #include "nvim/highlight_defs.h" @@ -19,47 +18,106 @@ typedef const char *cstr_t; typedef void *ptr_t; -#define Map(T, U) Map_##T##_##U +#define Set(type) Set_##type +#define Map(T, U) Map_##T##U #define PMap(T) Map(T, ptr_t) +static const int value_init_int = 0; +static const ptr_t value_init_ptr_t = NULL; +static const ssize_t value_init_ssize_t = -1; +static const uint32_t value_init_uint32_t = 0; +static const uint64_t value_init_uint64_t = 0; +static const String value_init_String = STRING_INIT; +static const ColorItem value_init_ColorItem = COLOR_ITEM_INITIALIZER; + +// layer 0: type non-specific code + +typedef struct { + uint32_t n_buckets; + uint32_t size; + uint32_t n_occupied; + uint32_t upper_bound; + uint32_t n_keys; // this is almost always "size", but keys[] could contain ded items.. + uint32_t keys_capacity; + uint32_t *hash; +} MapHash; + +#define MAPHASH_INIT { 0, 0, 0, 0, 0, 0, NULL } +#define SET_INIT { MAPHASH_INIT, NULL } +#define MAP_INIT { SET_INIT, NULL } + +#define MH_TOMBSTONE UINT32_MAX + +#define mh_is_empty(h, i) ((h)->hash[i] == 0) +#define mh_is_del(h, i) ((h)->hash[i] == MH_TOMBSTONE) +#define mh_is_either(h, i) ((uint32_t)((h)->hash[i] + 1U) <= 1U) + +typedef enum { + kMHExisting = 0, + kMHNewKeyDidFit, + kMHNewKeyRealloc, +} MhPutStatus; + +void mh_clear(MapHash *h); +void mh_realloc(MapHash *h, uint32_t n_min_buckets); + +// layer 1: key type specific defs +// This is all need for sets. + #define KEY_DECLS(T) \ - KHASH_DECLARE(T) \ + typedef struct { \ + MapHash h; \ + T *keys; \ + } Set(T); \ + \ + uint32_t mh_find_bucket_##T(Set(T) *set, T key, bool put); \ + uint32_t mh_get_##T(Set(T) *set, T key); \ + void mh_rehash_##T(Set(T) *set); \ + uint32_t mh_put_##T(Set(T) *set, T key, MhPutStatus *new); \ + uint32_t mh_delete_##T(Set(T) *set, T *key); \ + \ static inline bool set_put_##T(Set(T) *set, T key, T **key_alloc) { \ - int kh_ret; \ - khiter_t k = kh_put(T, set, key, &kh_ret, 0); \ + MhPutStatus status; \ + uint32_t k = mh_put_##T(set, key, &status); \ if (key_alloc) { \ - *key_alloc = &kh_key(set, k); \ + *key_alloc = &set->keys[k]; \ } \ - return kh_ret; \ + return status != kMHExisting; \ } \ - static inline void set_del_##T(Set(T) *set, T key) \ + static inline T set_del_##T(Set(T) *set, T key) \ { \ - khiter_t k; \ - if ((k = kh_get(T, set, key)) != kh_end(set)) { \ - kh_del(T, set, k); \ - } \ + mh_delete_##T(set, &key); \ + return key; \ } \ static inline bool set_has_##T(Set(T) *set, T key) { \ - return (kh_get(T, set, key) != kh_end(set)); \ + return mh_get_##T(set, key) != MH_TOMBSTONE; \ } \ +// layer 2: key+value specific defs +// now we finally get Maps + #define MAP_DECLS(T, U) \ typedef struct { \ - khash_t(T) table; \ + Set(T) set; \ + U *values; \ } Map(T, U); \ - U map_##T##_##U##_get(Map(T, U) *map, T key); \ - static inline bool map_##T##_##U##_has(Map(T, U) *map, T key) \ + static inline U map_get_##T##U(Map(T, U) *map, T key) \ { \ - return kh_get(T, &map->table, key) != kh_end(&map->table); \ + uint32_t k = mh_get_##T(&map->set, key); \ + return k == MH_TOMBSTONE ? value_init_##U : map->values[k]; \ } \ - U map_##T##_##U##_put(Map(T, U) *map, T key, U value); \ - U *map_##T##_##U##_ref(Map(T, U) *map, T key, T **key_alloc); \ - U *map_##T##_##U##_put_ref(Map(T, U) *map, T key, T **key_alloc, bool *new_item); \ - U map_##T##_##U##_del(Map(T, U) *map, T key, T *key_alloc); \ + U *map_ref_##T##U(Map(T, U) *map, T key, T **key_alloc); \ + U *map_put_ref_##T##U(Map(T, U) *map, T key, T **key_alloc, bool *new_item); \ + static inline void map_put_##T##U(Map(T, U) *map, T key, U value) \ + { \ + U *val = map_put_ref_##T##U(map, key, NULL, NULL); \ + *val = value; \ + } \ + U map_del_##T##U(Map(T, U) *map, T key, T *key_alloc); \ -// NOTE: Keys AND values must be allocated! khash.h does not make a copy. +// NOTE: Keys AND values must be allocated! Map and Set does not make a copy. -#define Set(type) khash_t(type) +#define quasiquote(x, y) x##y KEY_DECLS(int) KEY_DECLS(cstr_t) @@ -72,7 +130,6 @@ KEY_DECLS(ColorKey) MAP_DECLS(int, int) MAP_DECLS(int, ptr_t) -MAP_DECLS(int, cstr_t) MAP_DECLS(cstr_t, ptr_t) MAP_DECLS(cstr_t, int) MAP_DECLS(ptr_t, ptr_t) @@ -82,49 +139,65 @@ MAP_DECLS(uint64_t, ssize_t) MAP_DECLS(uint64_t, uint64_t) MAP_DECLS(uint32_t, uint32_t) MAP_DECLS(HlEntry, int) -MAP_DECLS(String, handle_T) MAP_DECLS(String, int) MAP_DECLS(int, String) MAP_DECLS(ColorKey, ColorItem) -#define SET_INIT { 0, 0, 0, 0, NULL, NULL, NULL } -#define MAP_INIT { SET_INIT } - -#define map_get(T, U) map_##T##_##U##_get -#define map_has(T, U) map_##T##_##U##_has -#define map_put(T, U) map_##T##_##U##_put -#define map_ref(T, U) map_##T##_##U##_ref -#define map_put_ref(T, U) map_##T##_##U##_put_ref -#define map_del(T, U) map_##T##_##U##_del -#define map_destroy(T, map) kh_dealloc(T, &(map)->table) -#define map_clear(T, map) kh_clear(T, &(map)->table) - -#define map_size(map) ((map)->table.size) +#define set_has(T, set, key) set_has_##T(set, key) +#define set_put(T, set, key) set_put_##T(set, key, NULL) +#define set_put_ref(T, set, key, key_alloc) set_put_##T(set, key, key_alloc) +#define set_del(T, set, key) set_del_##T(set, key) +#define set_destroy(T, set) (xfree((set)->keys), xfree((set)->h.hash)) +#define set_clear(T, set) mh_clear(&(set)->h) +#define set_size(set) ((set)->h.size) + +#define map_get(T, U) map_get_##T##U +#define map_has(T, map, key) set_has(T, &(map)->set, key) +#define map_put(T, U) map_put_##T##U +#define map_ref(T, U) map_ref_##T##U +#define map_put_ref(T, U) map_put_ref_##T##U +#define map_del(T, U) map_del_##T##U +#define map_destroy(T, map) (set_destroy(T, &(map)->set), xfree((map)->values)) +#define map_clear(T, map) set_clear(T, &(map)->set) +#define map_size(map) set_size(&(map)->set) #define pmap_get(T) map_get(T, ptr_t) -#define pmap_has(T) map_has(T, ptr_t) #define pmap_put(T) map_put(T, ptr_t) #define pmap_ref(T) map_ref(T, ptr_t) #define pmap_put_ref(T) map_put_ref(T, ptr_t) /// @see pmap_del2 #define pmap_del(T) map_del(T, ptr_t) -#define map_foreach(U, map, key, value, block) kh_foreach(U, &(map)->table, key, value, block) +#define set_foreach(set, key, block) \ + { \ + uint32_t __i; \ + for (__i = 0; __i < (set)->h.n_keys; __i++) { \ + (key) = (set)->keys[__i]; \ + block; \ + } \ + } -#define map_foreach_value(U, map, value, block) kh_foreach_value(U, &(map)->table, value, block) -#define map_foreach_key(map, key, block) kh_foreach_key(&(map)->table, key, block) -#define set_foreach(set, key, block) kh_foreach_key(set, key, block) +#define map_foreach_key(map, key, block) set_foreach(&(map)->set, key, block) + +#define map_foreach(map, key, value, code) \ + { \ + uint32_t __i; \ + for (__i = 0; __i < (map)->set.h.n_keys; __i++) { \ + (key) = (map)->set.keys[__i]; \ + (value) = (map)->values[__i]; \ + code; \ + } \ + } -#define pmap_foreach_value(map, value, block) map_foreach_value(ptr_t, map, value, block) -#define pmap_foreach(map, key, value, block) map_foreach(ptr_t, map, key, value, block) +#define map_foreach_value(map, value, code) \ + { \ + uint32_t __i; \ + for (__i = 0; __i < (map)->set.h.n_keys; __i++) { \ + (value) = (map)->values[__i]; \ + code; \ + } \ + } void pmap_del2(PMap(cstr_t) *map, const char *key); -#define set_has(T, set, key) set_has_##T(set, key) -#define set_put(T, set, key) set_put_##T(set, key, NULL) -#define set_put_ref(T, set, key, key_alloc) set_put_##T(set, key, key_alloc) -#define set_del(T, set, key) set_del_##T(set, key) -#define set_destroy(T, set) kh_dealloc(T, set) -#define set_clear(T, set) kh_clear(T, set) - #endif // NVIM_MAP_H |