1 files changed, 301 insertions, 0 deletions
diff --git a/src/nvim/msgpack_rpc/unpacker.c b/src/nvim/msgpack_rpc/unpacker.c
new file mode 100644
index 0000000000..9db8f314bf
--- /dev/null
+++ b/src/nvim/msgpack_rpc/unpacker.c
@@ -0,0 +1,301 @@
+// This is an open source non-commercial project. Dear PVS-Studio, please check
+// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
+
+#include "nvim/api/private/helpers.h"
+#include "nvim/log.h"
+#include "nvim/msgpack_rpc/helpers.h"
+#include "nvim/msgpack_rpc/unpacker.h"
+
+#ifdef INCLUDE_GENERATED_DECLARATIONS
+# include "msgpack_rpc/unpacker.c.generated.h"
+#endif
+
+Object unpack(const char *data, size_t size, Error *err)
+{
+  Unpacker unpacker;
+  mpack_parser_init(&unpacker.parser, 0);
+  unpacker.parser.data.p = &unpacker;
+
+  int result = mpack_parse(&unpacker.parser, &data, &size,
+                           api_parse_enter, api_parse_exit);
+
+  if (result == MPACK_NOMEM) {
+    api_set_error(err, kErrorTypeException, "object was too deep to unpack");
+  } else if (result == MPACK_EOF) {
+    api_set_error(err, kErrorTypeException, "incomplete msgpack string");
+  } else if (result == MPACK_ERROR) {
+    api_set_error(err, kErrorTypeException, "invalid msgpack string");
+  } else if (result == MPACK_OK && size) {
+    api_set_error(err, kErrorTypeException, "trailing data in msgpack string");
+  }
+
+  return unpacker.result;
+}
+
+static void api_parse_enter(mpack_parser_t *parser, mpack_node_t *node)
+{
+  Unpacker *unpacker = parser->data.p;
+  Object *result = NULL;
+  String *key_location = NULL;
+
+  mpack_node_t *parent = MPACK_PARENT_NODE(node);
+  if (parent) {
+    switch (parent->tok.type) {
+    case MPACK_TOKEN_ARRAY: {
+      Object *obj = parent->data[0].p;
+      result = &kv_A(obj->data.array, parent->pos);
+      break;
+    }
+    case MPACK_TOKEN_MAP: {
+      Object *obj = parent->data[0].p;
+      KeyValuePair *kv = &kv_A(obj->data.dictionary, parent->pos);
+      if (!parent->key_visited) {
+        key_location = &kv->key;
+      } else {
+        result = &kv->value;
+      }
+      break;
+    }
+
+    default:
+      break;
+    }
+  } else {
+    result = &unpacker->result;
+  }
+
+  switch (node->tok.type) {
+  case MPACK_TOKEN_NIL:
+    *result = NIL;
+    break;
+  case MPACK_TOKEN_BOOLEAN:
+    *result = BOOL(mpack_unpack_boolean(node->tok));
+    break;
+  case MPACK_TOKEN_SINT:
+    *result = INTEGER_OBJ(mpack_unpack_sint(node->tok));
+    break;
+  case MPACK_TOKEN_UINT:
+    *result = INTEGER_OBJ((Integer)mpack_unpack_uint(node->tok));
+    break;
+  case MPACK_TOKEN_FLOAT:
+    *result = FLOAT_OBJ(mpack_unpack_float(node->tok));
+    break;
+  case MPACK_TOKEN_BIN:
+  case MPACK_TOKEN_STR: {
+    String str = { .data = xmallocz(node->tok.length), .size = node->tok.length };
+
+    if (key_location) {
+      *key_location = str;
+    } else {
+      *result = STRING_OBJ(str);
+    }
+
+    node->data[0].p = str.data;
+    break;
+  }
+  case MPACK_TOKEN_EXT:
+    // handled in chunk; but save result location
+    node->data[0].p = result;
+    break;
+
+  case MPACK_TOKEN_CHUNK:
+    if (parent->tok.type == MPACK_TOKEN_STR || parent->tok.type == MPACK_TOKEN_BIN) {
+      char *data = parent->data[0].p;
+      memcpy(data + parent->pos,
+             node->tok.data.chunk_ptr, node->tok.length);
+    } else {
+      Object *res = parent->data[0].p;
+
+      size_t endlen = parent->pos + node->tok.length;
+      if (endlen > MAX_EXT_LEN) {
+        *res = NIL;
+        break;
+      }
+      memcpy(unpacker->ext_buf + parent->pos,
+             node->tok.data.chunk_ptr, node->tok.length);
+      if (parent->pos + node->tok.length < parent->tok.length) {
+        break;  // EOF, let's get back to it later
+      }
+      const char *buf = unpacker->ext_buf;
+      size_t size = parent->tok.length;
+      mpack_token_t ext_tok;
+      int status = mpack_rtoken(&buf, &size, &ext_tok);
+      if (status || ext_tok.type != MPACK_TOKEN_UINT) {
+        // TODO(bfredl): once we fixed memory management, we can set
+        // p->unpack_error and a flag like p->interrupted
+        *res = NIL;
+        break;
+      }
+      int ext_type = parent->tok.data.ext_type;
+      if (0 <= ext_type && ext_type <= EXT_OBJECT_TYPE_MAX) {
+        res->type = (ObjectType)(ext_type + EXT_OBJECT_TYPE_SHIFT);
+        res->data.integer = (int64_t)mpack_unpack_uint(ext_tok);
+      } else {
+        *res = NIL;
+        break;
+      }
+    }
+    break;
+
+  case MPACK_TOKEN_ARRAY: {
+    Array arr = KV_INITIAL_VALUE;
+    kv_resize(arr, node->tok.length);
+    kv_size(arr) = node->tok.length;
+    *result = ARRAY_OBJ(arr);
+    node->data[0].p = result;
+    break;
+  }
+  case MPACK_TOKEN_MAP: {
+    Dictionary dict = KV_INITIAL_VALUE;
+    kv_resize(dict, node->tok.length);
+    kv_size(dict) = node->tok.length;
+    *result = DICTIONARY_OBJ(dict);
+    node->data[0].p = result;
+    break;
+  }
+  default:
+    abort();
+  }
+}
+
+static void api_parse_exit(mpack_parser_t *parser, mpack_node_t *node)
+{}
+
+void unpacker_init(Unpacker *p)
+{
+  mpack_parser_init(&p->parser, 0);
+  p->parser.data.p = p;
+  mpack_tokbuf_init(&p->reader);
+  p->unpack_error = (Error)ERROR_INIT;
+}
+
+bool unpacker_parse_header(Unpacker *p)
+{
+  mpack_token_t tok;
+  int result;
+
+  const char *data = p->read_ptr;
+  size_t size = p->read_size;
+
+  assert(!ERROR_SET(&p->unpack_error));
+
+#define NEXT(tok) \
+  result = mpack_read(&p->reader, &data, &size, &tok); \
+  if (result) { goto error; }
+
+  NEXT(tok);
+  if (tok.type != MPACK_TOKEN_ARRAY || tok.length < 3 || tok.length > 4) {
+    goto error;
+  }
+  size_t array_length = tok.length;
+
+  NEXT(tok);
+  if (tok.type != MPACK_TOKEN_UINT) {
+    goto error;
+  }
+  uint32_t type = (uint32_t)mpack_unpack_uint(tok);
+  if ((array_length == 3) ? type != 2 : (type >= 2)) {
+    goto error;
+  }
+  p->type = (MessageType)type;
+  p->request_id = 0;
+
+  if (p->type != kMessageTypeNotification) {
+    NEXT(tok);
+    if (tok.type != MPACK_TOKEN_UINT) {
+      goto error;
+    }
+    p->request_id = (uint32_t)mpack_unpack_uint(tok);
+  }
+
+  if (p->type != kMessageTypeResponse) {
+    NEXT(tok);
+    if ((tok.type != MPACK_TOKEN_STR && tok.type != MPACK_TOKEN_BIN)
+        || tok.length > 100) {
+      goto error;
+    }
+    p->method_name_len = tok.length;
+
+    if (p->method_name_len > 0) {
+      NEXT(tok);
+      assert(tok.type == MPACK_TOKEN_CHUNK);
+    }
+    if (tok.length < p->method_name_len) {
+      result = MPACK_EOF;
+      goto error;
+    }
+    // if this fails, p->handler.fn will be NULL
+    p->handler = msgpack_rpc_get_handler_for(tok.length ? tok.data.chunk_ptr : "",
+                                             tok.length, &p->unpack_error);
+  }
+
+  p->read_ptr = data;
+  p->read_size = size;
+  return true;
+#undef NEXT
+
+error:
+  if (result == MPACK_EOF) {
+    // recover later by retrying from scratch
+    // when more data is available.
+    mpack_tokbuf_init(&p->reader);
+  } else {
+    api_set_error(&p->unpack_error, kErrorTypeValidation, "failed to decode msgpack");
+    p->state = -1;
+  }
+  return false;
+}
+
+// BASIC BITCH STATE MACHINE
+//
+// With some basic assumptions, we can parse the overall structure of msgpack-rpc
+// messages with a hand-rolled FSM of just 3 states (<x> = p->state):
+//
+// <0>[0, request_id, method_name, <2>args]
+// <0>[1, request_id, <1>err, <2>result]
+// <0>[2, method_name, <2>args]
+//
+// The assumption here is that the header of the message, which we define as the
+// initial array head, the kind integer, request_id and/or method name (when needed),
+// is relatively small, just ~10 bytes + the method name. Thus we can simply refuse
+// to advance the stream beyond the header until it can be parsed in its entirety.
+//
+// Of course, later on, we want to specialize state 2 into sub-states depending
+// on the specific method. "nvim_exec_lua" should just decode direct into lua
+// objects, and "redraw/grid_line" should use a hand-rolled decoder to avoid
+// a blizzard of small objects for each screen cell.
+
+bool unpacker_advance(Unpacker *p)
+{
+  assert(p->state >= 0);
+  if (p->state == 0) {
+    if (!unpacker_parse_header(p)) {
+      return false;
+    }
+    p->state = p->type == kMessageTypeResponse ? 1 : 2;
+  }
+
+  int result;
+
+rerun:
+  result = mpack_parse(&p->parser, &p->read_ptr, &p->read_size,
+                       api_parse_enter, api_parse_exit);
+
+  if (result == MPACK_EOF) {
+    return false;
+  } else if (result != MPACK_OK) {
+    api_set_error(&p->unpack_error, kErrorTypeValidation, "failed to parse msgpack");
+    p->state = -1;
+    return false;
+  }
+
+  if (p->state == 1) {
+    p->error = p->result;
+    p->state = 2;
+    goto rerun;
+  } else {
+    assert(p->state == 2);
+    p->state = 0;
+  }
+  return true;
+}