diff options
Diffstat (limited to 'src/nvim/msgpack_rpc/unpacker.c')
-rw-r--r-- | src/nvim/msgpack_rpc/unpacker.c | 301 |
1 files changed, 301 insertions, 0 deletions
diff --git a/src/nvim/msgpack_rpc/unpacker.c b/src/nvim/msgpack_rpc/unpacker.c new file mode 100644 index 0000000000..9db8f314bf --- /dev/null +++ b/src/nvim/msgpack_rpc/unpacker.c @@ -0,0 +1,301 @@ +// This is an open source non-commercial project. Dear PVS-Studio, please check +// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com + +#include "nvim/api/private/helpers.h" +#include "nvim/log.h" +#include "nvim/msgpack_rpc/helpers.h" +#include "nvim/msgpack_rpc/unpacker.h" + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "msgpack_rpc/unpacker.c.generated.h" +#endif + +Object unpack(const char *data, size_t size, Error *err) +{ + Unpacker unpacker; + mpack_parser_init(&unpacker.parser, 0); + unpacker.parser.data.p = &unpacker; + + int result = mpack_parse(&unpacker.parser, &data, &size, + api_parse_enter, api_parse_exit); + + if (result == MPACK_NOMEM) { + api_set_error(err, kErrorTypeException, "object was too deep to unpack"); + } else if (result == MPACK_EOF) { + api_set_error(err, kErrorTypeException, "incomplete msgpack string"); + } else if (result == MPACK_ERROR) { + api_set_error(err, kErrorTypeException, "invalid msgpack string"); + } else if (result == MPACK_OK && size) { + api_set_error(err, kErrorTypeException, "trailing data in msgpack string"); + } + + return unpacker.result; +} + +static void api_parse_enter(mpack_parser_t *parser, mpack_node_t *node) +{ + Unpacker *unpacker = parser->data.p; + Object *result = NULL; + String *key_location = NULL; + + mpack_node_t *parent = MPACK_PARENT_NODE(node); + if (parent) { + switch (parent->tok.type) { + case MPACK_TOKEN_ARRAY: { + Object *obj = parent->data[0].p; + result = &kv_A(obj->data.array, parent->pos); + break; + } + case MPACK_TOKEN_MAP: { + Object *obj = parent->data[0].p; + KeyValuePair *kv = &kv_A(obj->data.dictionary, parent->pos); + if (!parent->key_visited) { + key_location = &kv->key; + } else { + result = &kv->value; + } + break; + } + + default: + break; + } + } else { + result = &unpacker->result; + } + + switch (node->tok.type) { + case MPACK_TOKEN_NIL: + *result = NIL; + break; + case MPACK_TOKEN_BOOLEAN: + *result = BOOL(mpack_unpack_boolean(node->tok)); + break; + case MPACK_TOKEN_SINT: + *result = INTEGER_OBJ(mpack_unpack_sint(node->tok)); + break; + case MPACK_TOKEN_UINT: + *result = INTEGER_OBJ((Integer)mpack_unpack_uint(node->tok)); + break; + case MPACK_TOKEN_FLOAT: + *result = FLOAT_OBJ(mpack_unpack_float(node->tok)); + break; + case MPACK_TOKEN_BIN: + case MPACK_TOKEN_STR: { + String str = { .data = xmallocz(node->tok.length), .size = node->tok.length }; + + if (key_location) { + *key_location = str; + } else { + *result = STRING_OBJ(str); + } + + node->data[0].p = str.data; + break; + } + case MPACK_TOKEN_EXT: + // handled in chunk; but save result location + node->data[0].p = result; + break; + + case MPACK_TOKEN_CHUNK: + if (parent->tok.type == MPACK_TOKEN_STR || parent->tok.type == MPACK_TOKEN_BIN) { + char *data = parent->data[0].p; + memcpy(data + parent->pos, + node->tok.data.chunk_ptr, node->tok.length); + } else { + Object *res = parent->data[0].p; + + size_t endlen = parent->pos + node->tok.length; + if (endlen > MAX_EXT_LEN) { + *res = NIL; + break; + } + memcpy(unpacker->ext_buf + parent->pos, + node->tok.data.chunk_ptr, node->tok.length); + if (parent->pos + node->tok.length < parent->tok.length) { + break; // EOF, let's get back to it later + } + const char *buf = unpacker->ext_buf; + size_t size = parent->tok.length; + mpack_token_t ext_tok; + int status = mpack_rtoken(&buf, &size, &ext_tok); + if (status || ext_tok.type != MPACK_TOKEN_UINT) { + // TODO(bfredl): once we fixed memory management, we can set + // p->unpack_error and a flag like p->interrupted + *res = NIL; + break; + } + int ext_type = parent->tok.data.ext_type; + if (0 <= ext_type && ext_type <= EXT_OBJECT_TYPE_MAX) { + res->type = (ObjectType)(ext_type + EXT_OBJECT_TYPE_SHIFT); + res->data.integer = (int64_t)mpack_unpack_uint(ext_tok); + } else { + *res = NIL; + break; + } + } + break; + + case MPACK_TOKEN_ARRAY: { + Array arr = KV_INITIAL_VALUE; + kv_resize(arr, node->tok.length); + kv_size(arr) = node->tok.length; + *result = ARRAY_OBJ(arr); + node->data[0].p = result; + break; + } + case MPACK_TOKEN_MAP: { + Dictionary dict = KV_INITIAL_VALUE; + kv_resize(dict, node->tok.length); + kv_size(dict) = node->tok.length; + *result = DICTIONARY_OBJ(dict); + node->data[0].p = result; + break; + } + default: + abort(); + } +} + +static void api_parse_exit(mpack_parser_t *parser, mpack_node_t *node) +{} + +void unpacker_init(Unpacker *p) +{ + mpack_parser_init(&p->parser, 0); + p->parser.data.p = p; + mpack_tokbuf_init(&p->reader); + p->unpack_error = (Error)ERROR_INIT; +} + +bool unpacker_parse_header(Unpacker *p) +{ + mpack_token_t tok; + int result; + + const char *data = p->read_ptr; + size_t size = p->read_size; + + assert(!ERROR_SET(&p->unpack_error)); + +#define NEXT(tok) \ + result = mpack_read(&p->reader, &data, &size, &tok); \ + if (result) { goto error; } + + NEXT(tok); + if (tok.type != MPACK_TOKEN_ARRAY || tok.length < 3 || tok.length > 4) { + goto error; + } + size_t array_length = tok.length; + + NEXT(tok); + if (tok.type != MPACK_TOKEN_UINT) { + goto error; + } + uint32_t type = (uint32_t)mpack_unpack_uint(tok); + if ((array_length == 3) ? type != 2 : (type >= 2)) { + goto error; + } + p->type = (MessageType)type; + p->request_id = 0; + + if (p->type != kMessageTypeNotification) { + NEXT(tok); + if (tok.type != MPACK_TOKEN_UINT) { + goto error; + } + p->request_id = (uint32_t)mpack_unpack_uint(tok); + } + + if (p->type != kMessageTypeResponse) { + NEXT(tok); + if ((tok.type != MPACK_TOKEN_STR && tok.type != MPACK_TOKEN_BIN) + || tok.length > 100) { + goto error; + } + p->method_name_len = tok.length; + + if (p->method_name_len > 0) { + NEXT(tok); + assert(tok.type == MPACK_TOKEN_CHUNK); + } + if (tok.length < p->method_name_len) { + result = MPACK_EOF; + goto error; + } + // if this fails, p->handler.fn will be NULL + p->handler = msgpack_rpc_get_handler_for(tok.length ? tok.data.chunk_ptr : "", + tok.length, &p->unpack_error); + } + + p->read_ptr = data; + p->read_size = size; + return true; +#undef NEXT + +error: + if (result == MPACK_EOF) { + // recover later by retrying from scratch + // when more data is available. + mpack_tokbuf_init(&p->reader); + } else { + api_set_error(&p->unpack_error, kErrorTypeValidation, "failed to decode msgpack"); + p->state = -1; + } + return false; +} + +// BASIC BITCH STATE MACHINE +// +// With some basic assumptions, we can parse the overall structure of msgpack-rpc +// messages with a hand-rolled FSM of just 3 states (<x> = p->state): +// +// <0>[0, request_id, method_name, <2>args] +// <0>[1, request_id, <1>err, <2>result] +// <0>[2, method_name, <2>args] +// +// The assumption here is that the header of the message, which we define as the +// initial array head, the kind integer, request_id and/or method name (when needed), +// is relatively small, just ~10 bytes + the method name. Thus we can simply refuse +// to advance the stream beyond the header until it can be parsed in its entirety. +// +// Of course, later on, we want to specialize state 2 into sub-states depending +// on the specific method. "nvim_exec_lua" should just decode direct into lua +// objects, and "redraw/grid_line" should use a hand-rolled decoder to avoid +// a blizzard of small objects for each screen cell. + +bool unpacker_advance(Unpacker *p) +{ + assert(p->state >= 0); + if (p->state == 0) { + if (!unpacker_parse_header(p)) { + return false; + } + p->state = p->type == kMessageTypeResponse ? 1 : 2; + } + + int result; + +rerun: + result = mpack_parse(&p->parser, &p->read_ptr, &p->read_size, + api_parse_enter, api_parse_exit); + + if (result == MPACK_EOF) { + return false; + } else if (result != MPACK_OK) { + api_set_error(&p->unpack_error, kErrorTypeValidation, "failed to parse msgpack"); + p->state = -1; + return false; + } + + if (p->state == 1) { + p->error = p->result; + p->state = 2; + goto rerun; + } else { + assert(p->state == 2); + p->state = 0; + } + return true; +} |