diff options
author | bfredl <bjorn.linse@gmail.com> | 2024-07-02 13:45:50 +0200 |
---|---|---|
committer | bfredl <bjorn.linse@gmail.com> | 2024-08-05 11:12:44 +0200 |
commit | f926cc32c9262b6254e2843276b951cef9da1afe (patch) | |
tree | 56f13240abae6ec0f3b13022b011da84948788c0 /src/nvim/eval | |
parent | 0c2860d9e5ec5417a94db6e3edd237578b76d418 (diff) | |
download | rneovim-f926cc32c9262b6254e2843276b951cef9da1afe.tar.gz rneovim-f926cc32c9262b6254e2843276b951cef9da1afe.tar.bz2 rneovim-f926cc32c9262b6254e2843276b951cef9da1afe.zip |
refactor(shada): rework msgpack decoding without msgpack-c
This also makes shada reading slightly faster due to avoiding
some copying and allocation.
Use keysets to drive decoding of msgpack maps for shada entries.
Diffstat (limited to 'src/nvim/eval')
-rw-r--r-- | src/nvim/eval/decode.c | 307 | ||||
-rw-r--r-- | src/nvim/eval/decode.h | 2 | ||||
-rw-r--r-- | src/nvim/eval/encode.c | 5 | ||||
-rw-r--r-- | src/nvim/eval/encode.h | 1 | ||||
-rw-r--r-- | src/nvim/eval/funcs.c | 128 | ||||
-rw-r--r-- | src/nvim/eval/typval.c | 5 |
6 files changed, 260 insertions, 188 deletions
diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 13cd3274dd..1ff8716763 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -1,5 +1,4 @@ #include <assert.h> -#include <msgpack/object.h> #include <stdbool.h> #include <stddef.h> #include <stdint.h> @@ -7,6 +6,7 @@ #include <string.h> #include "klib/kvec.h" +#include "mpack/object.h" #include "nvim/ascii_defs.h" #include "nvim/charset.h" #include "nvim/eval.h" @@ -885,173 +885,250 @@ json_decode_string_ret: #undef DICT_LEN -/// Convert msgpack object to a Vimscript one -int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) - FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT +static void positive_integer_to_special_typval(typval_T *rettv, uint64_t val) { - switch (mobj.type) { - case MSGPACK_OBJECT_NIL: + if (val <= VARNUMBER_MAX) { *rettv = (typval_T) { + .v_type = VAR_NUMBER, + .v_lock = VAR_UNLOCKED, + .vval = { .v_number = (varnumber_T)val }, + }; + } else { + list_T *const list = tv_list_alloc(4); + tv_list_ref(list); + create_special_dict(rettv, kMPInteger, ((typval_T) { + .v_type = VAR_LIST, + .v_lock = VAR_UNLOCKED, + .vval = { .v_list = list }, + })); + tv_list_append_number(list, 1); + tv_list_append_number(list, (varnumber_T)((val >> 62) & 0x3)); + tv_list_append_number(list, (varnumber_T)((val >> 31) & 0x7FFFFFFF)); + tv_list_append_number(list, (varnumber_T)(val & 0x7FFFFFFF)); + } +} + +static void typval_parse_enter(mpack_parser_t *parser, mpack_node_t *node) +{ + typval_T *result = NULL; + + mpack_node_t *parent = MPACK_PARENT_NODE(node); + if (parent) { + switch (parent->tok.type) { + case MPACK_TOKEN_ARRAY: { + list_T *list = parent->data[1].p; + result = tv_list_append_owned_tv(list, (typval_T) { .v_type = VAR_UNKNOWN }); + break; + } + case MPACK_TOKEN_MAP: { + typval_T(*items)[2] = parent->data[1].p; + result = &items[parent->pos][parent->key_visited]; + break; + } + + case MPACK_TOKEN_STR: + case MPACK_TOKEN_BIN: + case MPACK_TOKEN_EXT: + assert(node->tok.type == MPACK_TOKEN_CHUNK); + break; + + default: + abort(); + } + } else { + result = parser->data.p; + } + + // for types that are completed in typval_parse_exit + node->data[0].p = result; + node->data[1].p = NULL; // free on error if non-NULL + + switch (node->tok.type) { + case MPACK_TOKEN_NIL: + *result = (typval_T) { .v_type = VAR_SPECIAL, .v_lock = VAR_UNLOCKED, .vval = { .v_special = kSpecialVarNull }, }; break; - case MSGPACK_OBJECT_BOOLEAN: - *rettv = (typval_T) { + case MPACK_TOKEN_BOOLEAN: + *result = (typval_T) { .v_type = VAR_BOOL, .v_lock = VAR_UNLOCKED, .vval = { - .v_bool = mobj.via.boolean ? kBoolVarTrue : kBoolVarFalse + .v_bool = mpack_unpack_boolean(node->tok) ? kBoolVarTrue : kBoolVarFalse }, }; break; - case MSGPACK_OBJECT_POSITIVE_INTEGER: - if (mobj.via.u64 <= VARNUMBER_MAX) { - *rettv = (typval_T) { - .v_type = VAR_NUMBER, - .v_lock = VAR_UNLOCKED, - .vval = { .v_number = (varnumber_T)mobj.via.u64 }, - }; - } else { - list_T *const list = tv_list_alloc(4); - tv_list_ref(list); - create_special_dict(rettv, kMPInteger, ((typval_T) { - .v_type = VAR_LIST, - .v_lock = VAR_UNLOCKED, - .vval = { .v_list = list }, - })); - uint64_t n = mobj.via.u64; - tv_list_append_number(list, 1); - tv_list_append_number(list, (varnumber_T)((n >> 62) & 0x3)); - tv_list_append_number(list, (varnumber_T)((n >> 31) & 0x7FFFFFFF)); - tv_list_append_number(list, (varnumber_T)(n & 0x7FFFFFFF)); - } + case MPACK_TOKEN_SINT: { + *result = (typval_T) { + .v_type = VAR_NUMBER, + .v_lock = VAR_UNLOCKED, + .vval = { .v_number = mpack_unpack_sint(node->tok) }, + }; break; - case MSGPACK_OBJECT_NEGATIVE_INTEGER: - if (mobj.via.i64 >= VARNUMBER_MIN) { - *rettv = (typval_T) { - .v_type = VAR_NUMBER, - .v_lock = VAR_UNLOCKED, - .vval = { .v_number = (varnumber_T)mobj.via.i64 }, - }; - } else { - list_T *const list = tv_list_alloc(4); - tv_list_ref(list); - create_special_dict(rettv, kMPInteger, ((typval_T) { - .v_type = VAR_LIST, - .v_lock = VAR_UNLOCKED, - .vval = { .v_list = list }, - })); - uint64_t n = -((uint64_t)mobj.via.i64); - tv_list_append_number(list, -1); - tv_list_append_number(list, (varnumber_T)((n >> 62) & 0x3)); - tv_list_append_number(list, (varnumber_T)((n >> 31) & 0x7FFFFFFF)); - tv_list_append_number(list, (varnumber_T)(n & 0x7FFFFFFF)); - } + } + case MPACK_TOKEN_UINT: + positive_integer_to_special_typval(result, mpack_unpack_uint(node->tok)); break; - case MSGPACK_OBJECT_FLOAT32: - case MSGPACK_OBJECT_FLOAT64: - *rettv = (typval_T) { + case MPACK_TOKEN_FLOAT: + *result = (typval_T) { .v_type = VAR_FLOAT, .v_lock = VAR_UNLOCKED, - .vval = { .v_float = mobj.via.f64 }, + .vval = { .v_float = mpack_unpack_float(node->tok) }, }; break; - case MSGPACK_OBJECT_STR: - case MSGPACK_OBJECT_BIN: - *rettv = decode_string(mobj.via.bin.ptr, mobj.via.bin.size, false, false); + + case MPACK_TOKEN_BIN: + case MPACK_TOKEN_STR: + case MPACK_TOKEN_EXT: + // actually converted in typval_parse_exit after the data chunks + node->data[1].p = xmallocz(node->tok.length); break; - case MSGPACK_OBJECT_ARRAY: { - list_T *const list = tv_list_alloc((ptrdiff_t)mobj.via.array.size); + case MPACK_TOKEN_CHUNK: { + char *data = parent->data[1].p; + memcpy(data + parent->pos, + node->tok.data.chunk_ptr, node->tok.length); + break; + } + + case MPACK_TOKEN_ARRAY: { + list_T *const list = tv_list_alloc((ptrdiff_t)node->tok.length); tv_list_ref(list); - *rettv = (typval_T) { + *result = (typval_T) { .v_type = VAR_LIST, .v_lock = VAR_UNLOCKED, .vval = { .v_list = list }, }; - for (size_t i = 0; i < mobj.via.array.size; i++) { - // Not populated yet, need to create list item to push. - tv_list_append_owned_tv(list, (typval_T) { .v_type = VAR_UNKNOWN }); - if (msgpack_to_vim(mobj.via.array.ptr[i], - TV_LIST_ITEM_TV(tv_list_last(list))) - == FAIL) { - return FAIL; - } + node->data[1].p = list; + break; + } + case MPACK_TOKEN_MAP: + // we don't know if this will be safe to convert to a typval dict yet + node->data[1].p = xmallocz(node->tok.length * 2 * sizeof(typval_T)); + break; + } +} + +/// free node which was entered but never exited, due to a nested error +/// +/// Don't bother with typvals as these will be GC:d eventually +void typval_parser_error_free(mpack_parser_t *parser) +{ + for (uint32_t i = 0; i < parser->size; i++) { + mpack_node_t *node = &parser->items[i]; + switch (node->tok.type) { + case MPACK_TOKEN_BIN: + case MPACK_TOKEN_STR: + case MPACK_TOKEN_EXT: + case MPACK_TOKEN_MAP: + XFREE_CLEAR(node->data[1].p); + break; + default: + break; } + } +} + +static void typval_parse_exit(mpack_parser_t *parser, mpack_node_t *node) +{ + typval_T *result = node->data[0].p; + switch (node->tok.type) { + case MPACK_TOKEN_BIN: + case MPACK_TOKEN_STR: + *result = decode_string(node->data[1].p, node->tok.length, false, true); + node->data[1].p = NULL; break; + + case MPACK_TOKEN_EXT: { + list_T *const list = tv_list_alloc(2); + tv_list_ref(list); + tv_list_append_number(list, node->tok.data.ext_type); + list_T *const ext_val_list = tv_list_alloc(kListLenMayKnow); + tv_list_append_list(list, ext_val_list); + create_special_dict(result, kMPExt, ((typval_T) { .v_type = VAR_LIST, + .v_lock = VAR_UNLOCKED, + .vval = { .v_list = list } })); + // TODO(bfredl): why not use BLOB? + encode_list_write((void *)ext_val_list, node->data[1].p, node->tok.length); + XFREE_CLEAR(node->data[1].p); } - case MSGPACK_OBJECT_MAP: { - for (size_t i = 0; i < mobj.via.map.size; i++) { - if ((mobj.via.map.ptr[i].key.type != MSGPACK_OBJECT_STR - && mobj.via.map.ptr[i].key.type != MSGPACK_OBJECT_BIN) - || mobj.via.map.ptr[i].key.via.str.size == 0 - || memchr(mobj.via.map.ptr[i].key.via.str.ptr, NUL, - mobj.via.map.ptr[i].key.via.str.size) != NULL) { + break; + + case MPACK_TOKEN_MAP: { + typval_T(*items)[2] = node->data[1].p; + for (size_t i = 0; i < node->tok.length; i++) { + typval_T *key = &items[i][0]; + if (key->v_type != VAR_STRING + || key->vval.v_string == NULL + || key->vval.v_string[0] == NUL) { goto msgpack_to_vim_generic_map; } } dict_T *const dict = tv_dict_alloc(); dict->dv_refcount++; - *rettv = (typval_T) { + *result = (typval_T) { .v_type = VAR_DICT, .v_lock = VAR_UNLOCKED, .vval = { .v_dict = dict }, }; - for (size_t i = 0; i < mobj.via.map.size; i++) { - dictitem_T *const di = xmallocz(offsetof(dictitem_T, di_key) - + mobj.via.map.ptr[i].key.via.str.size); - memcpy(&di->di_key[0], mobj.via.map.ptr[i].key.via.str.ptr, - mobj.via.map.ptr[i].key.via.str.size); + for (size_t i = 0; i < node->tok.length; i++) { + char *key = items[i][0].vval.v_string; + size_t keylen = strlen(key); + dictitem_T *const di = xmallocz(offsetof(dictitem_T, di_key) + keylen); + memcpy(&di->di_key[0], key, keylen); di->di_tv.v_type = VAR_UNKNOWN; if (tv_dict_add(dict, di) == FAIL) { // Duplicate key: fallback to generic map - tv_clear(rettv); + TV_DICT_ITER(dict, d, { + d->di_tv.v_type = VAR_SPECIAL; // don't free values in tv_clear(), they will be reused + d->di_tv.vval.v_special = kSpecialVarNull; + }); + tv_clear(result); xfree(di); goto msgpack_to_vim_generic_map; } - if (msgpack_to_vim(mobj.via.map.ptr[i].val, &di->di_tv) == FAIL) { - return FAIL; - } + di->di_tv = items[i][1]; + } + for (size_t i = 0; i < node->tok.length; i++) { + xfree(items[i][0].vval.v_string); } + XFREE_CLEAR(node->data[1].p); break; msgpack_to_vim_generic_map: {} - list_T *const list = decode_create_map_special_dict(rettv, (ptrdiff_t)mobj.via.map.size); - for (size_t i = 0; i < mobj.via.map.size; i++) { + list_T *const list = decode_create_map_special_dict(result, node->tok.length); + for (size_t i = 0; i < node->tok.length; i++) { list_T *const kv_pair = tv_list_alloc(2); tv_list_append_list(list, kv_pair); - typval_T key_tv = { .v_type = VAR_UNKNOWN }; - if (msgpack_to_vim(mobj.via.map.ptr[i].key, &key_tv) == FAIL) { - tv_clear(&key_tv); - return FAIL; - } - tv_list_append_owned_tv(kv_pair, key_tv); - - typval_T val_tv = { .v_type = VAR_UNKNOWN }; - if (msgpack_to_vim(mobj.via.map.ptr[i].val, &val_tv) == FAIL) { - tv_clear(&val_tv); - return FAIL; - } - tv_list_append_owned_tv(kv_pair, val_tv); + tv_list_append_owned_tv(kv_pair, items[i][0]); + tv_list_append_owned_tv(kv_pair, items[i][1]); } + XFREE_CLEAR(node->data[1].p); break; } - case MSGPACK_OBJECT_EXT: { - list_T *const list = tv_list_alloc(2); - tv_list_ref(list); - tv_list_append_number(list, mobj.via.ext.type); - list_T *const ext_val_list = tv_list_alloc(kListLenMayKnow); - tv_list_append_list(list, ext_val_list); - create_special_dict(rettv, kMPExt, ((typval_T) { .v_type = VAR_LIST, - .v_lock = VAR_UNLOCKED, - .vval = { .v_list = list } })); - if (encode_list_write((void *)ext_val_list, mobj.via.ext.ptr, - mobj.via.ext.size) == -1) { - return FAIL; - } + + default: + // other kinds are handled completely in typval_parse_enter, break; } +} + +int mpack_parse_typval(mpack_parser_t *parser, const char **data, size_t *size) +{ + return mpack_parse(parser, data, size, typval_parse_enter, typval_parse_exit); +} + +int unpack_typval(const char **data, size_t *size, typval_T *ret) +{ + ret->v_type = VAR_UNKNOWN; + mpack_parser_t parser; + mpack_parser_init(&parser, 0); + parser.data.p = ret; + int status = mpack_parse_typval(&parser, data, size); + if (status != MPACK_OK) { + typval_parser_error_free(&parser); + tv_clear(ret); } - return OK; + return status; } diff --git a/src/nvim/eval/decode.h b/src/nvim/eval/decode.h index c0d10a469a..485cc65561 100644 --- a/src/nvim/eval/decode.h +++ b/src/nvim/eval/decode.h @@ -1,8 +1,8 @@ #pragma once -#include <msgpack.h> // IWYU pragma: keep #include <stddef.h> // IWYU pragma: keep +#include "mpack/object.h" #include "nvim/eval/typval_defs.h" // IWYU pragma: keep #include "nvim/types_defs.h" // IWYU pragma: keep diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 92c5aaeffd..79f334601d 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -55,11 +55,11 @@ int encode_blob_write(void *const data, const char *const buf, const size_t len) } /// Msgpack callback for writing to readfile()-style list -int encode_list_write(void *const data, const char *const buf, const size_t len) +void encode_list_write(void *const data, const char *const buf, const size_t len) FUNC_ATTR_NONNULL_ARG(1) { if (len == 0) { - return 0; + return; } list_T *const list = (list_T *)data; const char *const end = buf + len; @@ -97,7 +97,6 @@ int encode_list_write(void *const data, const char *const buf, const size_t len) if (line_end == end) { tv_list_append_allocated_string(list, NULL); } - return 0; } /// Abort conversion to string after a recursion error. diff --git a/src/nvim/eval/encode.h b/src/nvim/eval/encode.h index efccfcb5a6..2bacc82b0d 100644 --- a/src/nvim/eval/encode.h +++ b/src/nvim/eval/encode.h @@ -1,6 +1,5 @@ #pragma once -#include <msgpack/pack.h> #include <string.h> #include "nvim/eval/typval_defs.h" diff --git a/src/nvim/eval/funcs.c b/src/nvim/eval/funcs.c index d2f6af4d9e..2f51532ec4 100644 --- a/src/nvim/eval/funcs.c +++ b/src/nvim/eval/funcs.c @@ -4,9 +4,6 @@ #include <inttypes.h> #include <limits.h> #include <math.h> -#include <msgpack/object.h> -#include <msgpack/pack.h> -#include <msgpack/unpack.h> #include <signal.h> #include <stdarg.h> #include <stddef.h> @@ -18,6 +15,7 @@ #include <uv.h> #include "auto/config.h" +#include "mpack/object.h" #include "nvim/api/private/converter.h" #include "nvim/api/private/defs.h" #include "nvim/api/private/dispatch.h" @@ -5723,36 +5721,24 @@ static void f_msgpackdump(typval_T *argvars, typval_T *rettv, EvalFuncData fptr) } } -static int msgpackparse_convert_item(const msgpack_object data, const msgpack_unpack_return result, - list_T *const ret_list, const bool fail_if_incomplete) - FUNC_ATTR_NONNULL_ALL +static void emsg_mpack_error(int status) { - switch (result) { - case MSGPACK_UNPACK_PARSE_ERROR: + switch (status) { + case MPACK_ERROR: semsg(_(e_invarg2), "Failed to parse msgpack string"); - return FAIL; - case MSGPACK_UNPACK_NOMEM_ERROR: - emsg(_(e_outofmem)); - return FAIL; - case MSGPACK_UNPACK_CONTINUE: - if (fail_if_incomplete) { - semsg(_(e_invarg2), "Incomplete msgpack string"); - return FAIL; - } - return NOTDONE; - case MSGPACK_UNPACK_SUCCESS: { - typval_T tv = { .v_type = VAR_UNKNOWN }; - if (msgpack_to_vim(data, &tv) == FAIL) { - semsg(_(e_invarg2), "Failed to convert msgpack string"); - return FAIL; - } - tv_list_append_owned_tv(ret_list, tv); - return OK; - } - case MSGPACK_UNPACK_EXTRA_BYTES: - abort(); + break; + + case MPACK_EOF: + semsg(_(e_invarg2), "Incomplete msgpack string"); + break; + + case MPACK_NOMEM: + semsg(_(e_invarg2), "object was too deep to unpack"); + break; + + default: + break; } - UNREACHABLE; } static void msgpackparse_unpack_list(const list_T *const list, list_T *const ret_list) @@ -5766,48 +5752,57 @@ static void msgpackparse_unpack_list(const list_T *const list, list_T *const ret return; } ListReaderState lrstate = encode_init_lrstate(list); - msgpack_unpacker *const unpacker = msgpack_unpacker_new(IOSIZE); - if (unpacker == NULL) { - emsg(_(e_outofmem)); - return; - } - msgpack_unpacked unpacked; - msgpack_unpacked_init(&unpacked); + char *buf = alloc_block(); + size_t buf_size = 0; + + typval_T cur_item = { .v_type = VAR_UNKNOWN }; + mpack_parser_t parser; + mpack_parser_init(&parser, 0); + parser.data.p = &cur_item; + + int status = MPACK_OK; while (true) { - if (!msgpack_unpacker_reserve_buffer(unpacker, IOSIZE)) { - emsg(_(e_outofmem)); - goto end; - } size_t read_bytes; - const int rlret = encode_read_from_list(&lrstate, msgpack_unpacker_buffer(unpacker), IOSIZE, + const int rlret = encode_read_from_list(&lrstate, buf + buf_size, ARENA_BLOCK_SIZE - buf_size, &read_bytes); if (rlret == FAIL) { semsg(_(e_invarg2), "List item is not a string"); goto end; } - msgpack_unpacker_buffer_consumed(unpacker, read_bytes); - if (read_bytes == 0) { - break; - } - while (unpacker->off < unpacker->used) { - const msgpack_unpack_return result - = msgpack_unpacker_next(unpacker, &unpacked); - const int conv_result = msgpackparse_convert_item(unpacked.data, result, - ret_list, rlret == OK); - if (conv_result == NOTDONE) { + buf_size += read_bytes; + + const char *ptr = buf; + while (buf_size) { + status = mpack_parse_typval(&parser, &ptr, &buf_size); + if (status == MPACK_OK) { + tv_list_append_owned_tv(ret_list, cur_item); + cur_item.v_type = VAR_UNKNOWN; + } else { break; - } else if (conv_result == FAIL) { - goto end; } } + if (rlret == OK) { break; } + + if (status == MPACK_EOF) { + // move remaining data to front of buffer + if (buf_size && ptr > buf) { + memmove(buf, ptr, buf_size); + } + } else if (status != MPACK_OK) { + break; + } + } + + if (status != MPACK_OK) { + typval_parser_error_free(&parser); + emsg_mpack_error(status); } end: - msgpack_unpacker_free(unpacker); - msgpack_unpacked_destroy(&unpacked); + free_block(buf); } static void msgpackparse_unpack_blob(const blob_T *const blob, list_T *const ret_list) @@ -5817,18 +5812,19 @@ static void msgpackparse_unpack_blob(const blob_T *const blob, list_T *const ret if (len == 0) { return; } - msgpack_unpacked unpacked; - msgpack_unpacked_init(&unpacked); - for (size_t offset = 0; offset < (size_t)len;) { - const msgpack_unpack_return result - = msgpack_unpack_next(&unpacked, blob->bv_ga.ga_data, (size_t)len, &offset); - if (msgpackparse_convert_item(unpacked.data, result, ret_list, true) - != OK) { - break; + + const char *data = blob->bv_ga.ga_data; + size_t remaining = (size_t)len; + while (remaining) { + typval_T tv; + int status = unpack_typval(&data, &remaining, &tv); + if (status != MPACK_OK) { + emsg_mpack_error(status); + return; } - } - msgpack_unpacked_destroy(&unpacked); + tv_list_append_owned_tv(ret_list, tv); + } } /// "msgpackparse" function diff --git a/src/nvim/eval/typval.c b/src/nvim/eval/typval.c index c00abe452c..e7b6a0feee 100644 --- a/src/nvim/eval/typval.c +++ b/src/nvim/eval/typval.c @@ -485,13 +485,14 @@ void tv_list_append_tv(list_T *const l, typval_T *const tv) /// Like tv_list_append_tv(), but tv is moved to a list /// /// This means that it is no longer valid to use contents of the typval_T after -/// function exits. -void tv_list_append_owned_tv(list_T *const l, typval_T tv) +/// function exits. A pointer is returned to the allocated typval which can be used +typval_T *tv_list_append_owned_tv(list_T *const l, typval_T tv) FUNC_ATTR_NONNULL_ALL { listitem_T *const li = tv_list_item_alloc(); *TV_LIST_ITEM_TV(li) = tv; tv_list_append(l, li); + return TV_LIST_ITEM_TV(li); } /// Append a list to a list as one item |