diff options
author | bfredl <bjorn.linse@gmail.com> | 2024-07-02 13:45:50 +0200 |
---|---|---|
committer | bfredl <bjorn.linse@gmail.com> | 2024-08-05 11:12:44 +0200 |
commit | f926cc32c9262b6254e2843276b951cef9da1afe (patch) | |
tree | 56f13240abae6ec0f3b13022b011da84948788c0 /src/nvim/eval/decode.c | |
parent | 0c2860d9e5ec5417a94db6e3edd237578b76d418 (diff) | |
download | rneovim-f926cc32c9262b6254e2843276b951cef9da1afe.tar.gz rneovim-f926cc32c9262b6254e2843276b951cef9da1afe.tar.bz2 rneovim-f926cc32c9262b6254e2843276b951cef9da1afe.zip |
refactor(shada): rework msgpack decoding without msgpack-c
This also makes shada reading slightly faster due to avoiding
some copying and allocation.
Use keysets to drive decoding of msgpack maps for shada entries.
Diffstat (limited to 'src/nvim/eval/decode.c')
-rw-r--r-- | src/nvim/eval/decode.c | 307 |
1 files changed, 192 insertions, 115 deletions
diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 13cd3274dd..1ff8716763 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -1,5 +1,4 @@ #include <assert.h> -#include <msgpack/object.h> #include <stdbool.h> #include <stddef.h> #include <stdint.h> @@ -7,6 +6,7 @@ #include <string.h> #include "klib/kvec.h" +#include "mpack/object.h" #include "nvim/ascii_defs.h" #include "nvim/charset.h" #include "nvim/eval.h" @@ -885,173 +885,250 @@ json_decode_string_ret: #undef DICT_LEN -/// Convert msgpack object to a Vimscript one -int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) - FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT +static void positive_integer_to_special_typval(typval_T *rettv, uint64_t val) { - switch (mobj.type) { - case MSGPACK_OBJECT_NIL: + if (val <= VARNUMBER_MAX) { *rettv = (typval_T) { + .v_type = VAR_NUMBER, + .v_lock = VAR_UNLOCKED, + .vval = { .v_number = (varnumber_T)val }, + }; + } else { + list_T *const list = tv_list_alloc(4); + tv_list_ref(list); + create_special_dict(rettv, kMPInteger, ((typval_T) { + .v_type = VAR_LIST, + .v_lock = VAR_UNLOCKED, + .vval = { .v_list = list }, + })); + tv_list_append_number(list, 1); + tv_list_append_number(list, (varnumber_T)((val >> 62) & 0x3)); + tv_list_append_number(list, (varnumber_T)((val >> 31) & 0x7FFFFFFF)); + tv_list_append_number(list, (varnumber_T)(val & 0x7FFFFFFF)); + } +} + +static void typval_parse_enter(mpack_parser_t *parser, mpack_node_t *node) +{ + typval_T *result = NULL; + + mpack_node_t *parent = MPACK_PARENT_NODE(node); + if (parent) { + switch (parent->tok.type) { + case MPACK_TOKEN_ARRAY: { + list_T *list = parent->data[1].p; + result = tv_list_append_owned_tv(list, (typval_T) { .v_type = VAR_UNKNOWN }); + break; + } + case MPACK_TOKEN_MAP: { + typval_T(*items)[2] = parent->data[1].p; + result = &items[parent->pos][parent->key_visited]; + break; + } + + case MPACK_TOKEN_STR: + case MPACK_TOKEN_BIN: + case MPACK_TOKEN_EXT: + assert(node->tok.type == MPACK_TOKEN_CHUNK); + break; + + default: + abort(); + } + } else { + result = parser->data.p; + } + + // for types that are completed in typval_parse_exit + node->data[0].p = result; + node->data[1].p = NULL; // free on error if non-NULL + + switch (node->tok.type) { + case MPACK_TOKEN_NIL: + *result = (typval_T) { .v_type = VAR_SPECIAL, .v_lock = VAR_UNLOCKED, .vval = { .v_special = kSpecialVarNull }, }; break; - case MSGPACK_OBJECT_BOOLEAN: - *rettv = (typval_T) { + case MPACK_TOKEN_BOOLEAN: + *result = (typval_T) { .v_type = VAR_BOOL, .v_lock = VAR_UNLOCKED, .vval = { - .v_bool = mobj.via.boolean ? kBoolVarTrue : kBoolVarFalse + .v_bool = mpack_unpack_boolean(node->tok) ? kBoolVarTrue : kBoolVarFalse }, }; break; - case MSGPACK_OBJECT_POSITIVE_INTEGER: - if (mobj.via.u64 <= VARNUMBER_MAX) { - *rettv = (typval_T) { - .v_type = VAR_NUMBER, - .v_lock = VAR_UNLOCKED, - .vval = { .v_number = (varnumber_T)mobj.via.u64 }, - }; - } else { - list_T *const list = tv_list_alloc(4); - tv_list_ref(list); - create_special_dict(rettv, kMPInteger, ((typval_T) { - .v_type = VAR_LIST, - .v_lock = VAR_UNLOCKED, - .vval = { .v_list = list }, - })); - uint64_t n = mobj.via.u64; - tv_list_append_number(list, 1); - tv_list_append_number(list, (varnumber_T)((n >> 62) & 0x3)); - tv_list_append_number(list, (varnumber_T)((n >> 31) & 0x7FFFFFFF)); - tv_list_append_number(list, (varnumber_T)(n & 0x7FFFFFFF)); - } + case MPACK_TOKEN_SINT: { + *result = (typval_T) { + .v_type = VAR_NUMBER, + .v_lock = VAR_UNLOCKED, + .vval = { .v_number = mpack_unpack_sint(node->tok) }, + }; break; - case MSGPACK_OBJECT_NEGATIVE_INTEGER: - if (mobj.via.i64 >= VARNUMBER_MIN) { - *rettv = (typval_T) { - .v_type = VAR_NUMBER, - .v_lock = VAR_UNLOCKED, - .vval = { .v_number = (varnumber_T)mobj.via.i64 }, - }; - } else { - list_T *const list = tv_list_alloc(4); - tv_list_ref(list); - create_special_dict(rettv, kMPInteger, ((typval_T) { - .v_type = VAR_LIST, - .v_lock = VAR_UNLOCKED, - .vval = { .v_list = list }, - })); - uint64_t n = -((uint64_t)mobj.via.i64); - tv_list_append_number(list, -1); - tv_list_append_number(list, (varnumber_T)((n >> 62) & 0x3)); - tv_list_append_number(list, (varnumber_T)((n >> 31) & 0x7FFFFFFF)); - tv_list_append_number(list, (varnumber_T)(n & 0x7FFFFFFF)); - } + } + case MPACK_TOKEN_UINT: + positive_integer_to_special_typval(result, mpack_unpack_uint(node->tok)); break; - case MSGPACK_OBJECT_FLOAT32: - case MSGPACK_OBJECT_FLOAT64: - *rettv = (typval_T) { + case MPACK_TOKEN_FLOAT: + *result = (typval_T) { .v_type = VAR_FLOAT, .v_lock = VAR_UNLOCKED, - .vval = { .v_float = mobj.via.f64 }, + .vval = { .v_float = mpack_unpack_float(node->tok) }, }; break; - case MSGPACK_OBJECT_STR: - case MSGPACK_OBJECT_BIN: - *rettv = decode_string(mobj.via.bin.ptr, mobj.via.bin.size, false, false); + + case MPACK_TOKEN_BIN: + case MPACK_TOKEN_STR: + case MPACK_TOKEN_EXT: + // actually converted in typval_parse_exit after the data chunks + node->data[1].p = xmallocz(node->tok.length); break; - case MSGPACK_OBJECT_ARRAY: { - list_T *const list = tv_list_alloc((ptrdiff_t)mobj.via.array.size); + case MPACK_TOKEN_CHUNK: { + char *data = parent->data[1].p; + memcpy(data + parent->pos, + node->tok.data.chunk_ptr, node->tok.length); + break; + } + + case MPACK_TOKEN_ARRAY: { + list_T *const list = tv_list_alloc((ptrdiff_t)node->tok.length); tv_list_ref(list); - *rettv = (typval_T) { + *result = (typval_T) { .v_type = VAR_LIST, .v_lock = VAR_UNLOCKED, .vval = { .v_list = list }, }; - for (size_t i = 0; i < mobj.via.array.size; i++) { - // Not populated yet, need to create list item to push. - tv_list_append_owned_tv(list, (typval_T) { .v_type = VAR_UNKNOWN }); - if (msgpack_to_vim(mobj.via.array.ptr[i], - TV_LIST_ITEM_TV(tv_list_last(list))) - == FAIL) { - return FAIL; - } + node->data[1].p = list; + break; + } + case MPACK_TOKEN_MAP: + // we don't know if this will be safe to convert to a typval dict yet + node->data[1].p = xmallocz(node->tok.length * 2 * sizeof(typval_T)); + break; + } +} + +/// free node which was entered but never exited, due to a nested error +/// +/// Don't bother with typvals as these will be GC:d eventually +void typval_parser_error_free(mpack_parser_t *parser) +{ + for (uint32_t i = 0; i < parser->size; i++) { + mpack_node_t *node = &parser->items[i]; + switch (node->tok.type) { + case MPACK_TOKEN_BIN: + case MPACK_TOKEN_STR: + case MPACK_TOKEN_EXT: + case MPACK_TOKEN_MAP: + XFREE_CLEAR(node->data[1].p); + break; + default: + break; } + } +} + +static void typval_parse_exit(mpack_parser_t *parser, mpack_node_t *node) +{ + typval_T *result = node->data[0].p; + switch (node->tok.type) { + case MPACK_TOKEN_BIN: + case MPACK_TOKEN_STR: + *result = decode_string(node->data[1].p, node->tok.length, false, true); + node->data[1].p = NULL; break; + + case MPACK_TOKEN_EXT: { + list_T *const list = tv_list_alloc(2); + tv_list_ref(list); + tv_list_append_number(list, node->tok.data.ext_type); + list_T *const ext_val_list = tv_list_alloc(kListLenMayKnow); + tv_list_append_list(list, ext_val_list); + create_special_dict(result, kMPExt, ((typval_T) { .v_type = VAR_LIST, + .v_lock = VAR_UNLOCKED, + .vval = { .v_list = list } })); + // TODO(bfredl): why not use BLOB? + encode_list_write((void *)ext_val_list, node->data[1].p, node->tok.length); + XFREE_CLEAR(node->data[1].p); } - case MSGPACK_OBJECT_MAP: { - for (size_t i = 0; i < mobj.via.map.size; i++) { - if ((mobj.via.map.ptr[i].key.type != MSGPACK_OBJECT_STR - && mobj.via.map.ptr[i].key.type != MSGPACK_OBJECT_BIN) - || mobj.via.map.ptr[i].key.via.str.size == 0 - || memchr(mobj.via.map.ptr[i].key.via.str.ptr, NUL, - mobj.via.map.ptr[i].key.via.str.size) != NULL) { + break; + + case MPACK_TOKEN_MAP: { + typval_T(*items)[2] = node->data[1].p; + for (size_t i = 0; i < node->tok.length; i++) { + typval_T *key = &items[i][0]; + if (key->v_type != VAR_STRING + || key->vval.v_string == NULL + || key->vval.v_string[0] == NUL) { goto msgpack_to_vim_generic_map; } } dict_T *const dict = tv_dict_alloc(); dict->dv_refcount++; - *rettv = (typval_T) { + *result = (typval_T) { .v_type = VAR_DICT, .v_lock = VAR_UNLOCKED, .vval = { .v_dict = dict }, }; - for (size_t i = 0; i < mobj.via.map.size; i++) { - dictitem_T *const di = xmallocz(offsetof(dictitem_T, di_key) - + mobj.via.map.ptr[i].key.via.str.size); - memcpy(&di->di_key[0], mobj.via.map.ptr[i].key.via.str.ptr, - mobj.via.map.ptr[i].key.via.str.size); + for (size_t i = 0; i < node->tok.length; i++) { + char *key = items[i][0].vval.v_string; + size_t keylen = strlen(key); + dictitem_T *const di = xmallocz(offsetof(dictitem_T, di_key) + keylen); + memcpy(&di->di_key[0], key, keylen); di->di_tv.v_type = VAR_UNKNOWN; if (tv_dict_add(dict, di) == FAIL) { // Duplicate key: fallback to generic map - tv_clear(rettv); + TV_DICT_ITER(dict, d, { + d->di_tv.v_type = VAR_SPECIAL; // don't free values in tv_clear(), they will be reused + d->di_tv.vval.v_special = kSpecialVarNull; + }); + tv_clear(result); xfree(di); goto msgpack_to_vim_generic_map; } - if (msgpack_to_vim(mobj.via.map.ptr[i].val, &di->di_tv) == FAIL) { - return FAIL; - } + di->di_tv = items[i][1]; + } + for (size_t i = 0; i < node->tok.length; i++) { + xfree(items[i][0].vval.v_string); } + XFREE_CLEAR(node->data[1].p); break; msgpack_to_vim_generic_map: {} - list_T *const list = decode_create_map_special_dict(rettv, (ptrdiff_t)mobj.via.map.size); - for (size_t i = 0; i < mobj.via.map.size; i++) { + list_T *const list = decode_create_map_special_dict(result, node->tok.length); + for (size_t i = 0; i < node->tok.length; i++) { list_T *const kv_pair = tv_list_alloc(2); tv_list_append_list(list, kv_pair); - typval_T key_tv = { .v_type = VAR_UNKNOWN }; - if (msgpack_to_vim(mobj.via.map.ptr[i].key, &key_tv) == FAIL) { - tv_clear(&key_tv); - return FAIL; - } - tv_list_append_owned_tv(kv_pair, key_tv); - - typval_T val_tv = { .v_type = VAR_UNKNOWN }; - if (msgpack_to_vim(mobj.via.map.ptr[i].val, &val_tv) == FAIL) { - tv_clear(&val_tv); - return FAIL; - } - tv_list_append_owned_tv(kv_pair, val_tv); + tv_list_append_owned_tv(kv_pair, items[i][0]); + tv_list_append_owned_tv(kv_pair, items[i][1]); } + XFREE_CLEAR(node->data[1].p); break; } - case MSGPACK_OBJECT_EXT: { - list_T *const list = tv_list_alloc(2); - tv_list_ref(list); - tv_list_append_number(list, mobj.via.ext.type); - list_T *const ext_val_list = tv_list_alloc(kListLenMayKnow); - tv_list_append_list(list, ext_val_list); - create_special_dict(rettv, kMPExt, ((typval_T) { .v_type = VAR_LIST, - .v_lock = VAR_UNLOCKED, - .vval = { .v_list = list } })); - if (encode_list_write((void *)ext_val_list, mobj.via.ext.ptr, - mobj.via.ext.size) == -1) { - return FAIL; - } + + default: + // other kinds are handled completely in typval_parse_enter, break; } +} + +int mpack_parse_typval(mpack_parser_t *parser, const char **data, size_t *size) +{ + return mpack_parse(parser, data, size, typval_parse_enter, typval_parse_exit); +} + +int unpack_typval(const char **data, size_t *size, typval_T *ret) +{ + ret->v_type = VAR_UNKNOWN; + mpack_parser_t parser; + mpack_parser_init(&parser, 0); + parser.data.p = ret; + int status = mpack_parse_typval(&parser, data, size); + if (status != MPACK_OK) { + typval_parser_error_free(&parser); + tv_clear(ret); } - return OK; + return status; } |