#include #include #include #include #include #include #include "klib/kvec.h" #include "mpack/conv.h" #include "mpack/mpack_core.h" #include "mpack/object.h" #include "nvim/ascii_defs.h" #include "nvim/charset.h" #include "nvim/eval.h" #include "nvim/eval/decode.h" #include "nvim/eval/encode.h" #include "nvim/eval/typval.h" #include "nvim/eval/typval_defs.h" #include "nvim/eval_defs.h" #include "nvim/garray.h" #include "nvim/gettext_defs.h" #include "nvim/macros_defs.h" #include "nvim/mbyte.h" #include "nvim/memory.h" #include "nvim/message.h" #include "nvim/vim_defs.h" /// Helper structure for container_struct typedef struct { size_t stack_index; ///< Index of current container in stack. list_T *special_val; ///< _VAL key contents for special maps. ///< When container is not a special dictionary it is ///< NULL. const char *s; ///< Location where container starts. typval_T container; ///< Container. Either VAR_LIST, VAR_DICT or VAR_LIST ///< which is _VAL from special dictionary. } ContainerStackItem; /// Helper structure for values struct typedef struct { bool is_special_string; ///< Indicates that current value is a special ///< dictionary with string. bool didcomma; ///< True if previous token was comma. bool didcolon; ///< True if previous token was colon. typval_T val; ///< Actual value. } ValuesStackItem; /// Vector containing values not yet saved in any container typedef kvec_t(ValuesStackItem) ValuesStack; /// Vector containing containers, each next container is located inside previous typedef kvec_t(ContainerStackItem) ContainerStack; #ifdef INCLUDE_GENERATED_DECLARATIONS # include "eval/decode.c.generated.h" #endif /// Create special dictionary /// /// @param[out] rettv Location where created dictionary will be saved. /// @param[in] type Type of the dictionary. /// @param[in] val Value associated with the _VAL key. static inline void create_special_dict(typval_T *const rettv, const MessagePackType type, typval_T val) FUNC_ATTR_NONNULL_ALL { dict_T *const dict = tv_dict_alloc(); dictitem_T *const type_di = tv_dict_item_alloc_len(S_LEN("_TYPE")); type_di->di_tv.v_type = VAR_LIST; type_di->di_tv.v_lock = VAR_UNLOCKED; type_di->di_tv.vval.v_list = (list_T *)eval_msgpack_type_lists[type]; tv_list_ref(type_di->di_tv.vval.v_list); tv_dict_add(dict, type_di); dictitem_T *const val_di = tv_dict_item_alloc_len(S_LEN("_VAL")); val_di->di_tv = val; tv_dict_add(dict, val_di); dict->dv_refcount++; *rettv = (typval_T) { .v_type = VAR_DICT, .v_lock = VAR_UNLOCKED, .vval = { .v_dict = dict }, }; } #define DICT_LEN(dict) (dict)->dv_hashtab.ht_used /// Helper function used for working with stack vectors used by JSON decoder /// /// @param[in,out] obj New object. Will either be put into the stack (and, /// probably, also inside container) or freed. /// @param[out] stack Object stack. /// @param[out] container_stack Container objects stack. /// @param[in,out] pp Position in string which is currently being parsed. Used /// for error reporting and is also set when decoding is /// restarted due to the necessity of converting regular /// dictionary to a special map. /// @param[out] next_map_special Is set to true when dictionary needs to be /// converted to a special map, otherwise not /// touched. Indicates that decoding has been /// restarted. /// @param[out] didcomma True if previous token was comma. Is set to recorded /// value when decoder is restarted, otherwise unused. /// @param[out] didcolon True if previous token was colon. Is set to recorded /// value when decoder is restarted, otherwise unused. /// /// @return OK in case of success, FAIL in case of error. static inline int json_decoder_pop(ValuesStackItem obj, ValuesStack *const stack, ContainerStack *const container_stack, const char **const pp, bool *const next_map_special, bool *const didcomma, bool *const didcolon) FUNC_ATTR_NONNULL_ALL { if (kv_size(*container_stack) == 0) { kv_push(*stack, obj); return OK; } ContainerStackItem last_container = kv_last(*container_stack); const char *val_location = *pp; if (obj.val.v_type == last_container.container.v_type // vval.v_list and vval.v_dict should have the same size and offset && ((void *)obj.val.vval.v_list == (void *)last_container.container.vval.v_list)) { (void)kv_pop(*container_stack); val_location = last_container.s; last_container = kv_last(*container_stack); } if (last_container.container.v_type == VAR_LIST) { if (tv_list_len(last_container.container.vval.v_list) != 0 && !obj.didcomma) { semsg(_("E474: Expected comma before list item: %s"), val_location); tv_clear(&obj.val); return FAIL; } assert(last_container.special_val == NULL); tv_list_append_owned_tv(last_container.container.vval.v_list, obj.val); } else if (last_container.stack_index == kv_size(*stack) - 2) { if (!obj.didcolon) { semsg(_("E474: Expected colon before dictionary value: %s"), val_location); tv_clear(&obj.val); return FAIL; } ValuesStackItem key = kv_pop(*stack); if (last_container.special_val == NULL) { // These cases should have already been handled. assert(!(key.is_special_string || key.val.vval.v_string == NULL)); dictitem_T *const obj_di = tv_dict_item_alloc(key.val.vval.v_string); tv_clear(&key.val); if (tv_dict_add(last_container.container.vval.v_dict, obj_di) == FAIL) { abort(); } obj_di->di_tv = obj.val; } else { list_T *const kv_pair = tv_list_alloc(2); tv_list_append_list(last_container.special_val, kv_pair); tv_list_append_owned_tv(kv_pair, key.val); tv_list_append_owned_tv(kv_pair, obj.val); } } else { // Object with key only if (!obj.is_special_string && obj.val.v_type != VAR_STRING) { semsg(_("E474: Expected string key: %s"), *pp); tv_clear(&obj.val); return FAIL; } else if (!obj.didcomma && (last_container.special_val == NULL && (DICT_LEN(last_container.container.vval.v_dict) != 0))) { semsg(_("E474: Expected comma before dictionary key: %s"), val_location); tv_clear(&obj.val); return FAIL; } // Handle special dictionaries if (last_container.special_val == NULL && (obj.is_special_string || obj.val.vval.v_string == NULL || tv_dict_find(last_container.container.vval.v_dict, obj.val.vval.v_string, -1))) { tv_clear(&obj.val); // Restart (void)kv_pop(*container_stack); ValuesStackItem last_container_val = kv_A(*stack, last_container.stack_index); while (kv_size(*stack) > last_container.stack_index) { tv_clear(&(kv_pop(*stack).val)); } *pp = last_container.s; *didcomma = last_container_val.didcomma; *didcolon = last_container_val.didcolon; *next_map_special = true; return OK; } kv_push(*stack, obj); } return OK; } #define LENP(p, e) \ ((int)((e) - (p))), (p) #define OBJ(obj_tv, is_sp_string, didcomma_, didcolon_) \ ((ValuesStackItem) { \ .is_special_string = (is_sp_string), \ .val = (obj_tv), \ .didcomma = (didcomma_), \ .didcolon = (didcolon_), \ }) #define POP(obj_tv, is_sp_string) \ do { \ if (json_decoder_pop(OBJ(obj_tv, is_sp_string, *didcomma, *didcolon), \ stack, container_stack, \ &p, next_map_special, didcomma, didcolon) \ == FAIL) { \ goto parse_json_string_fail; \ } \ if (*next_map_special) { \ goto parse_json_string_ret; \ } \ } while (0) /// Create a new special dictionary that ought to represent a MAP /// /// @param[out] ret_tv Address where new special dictionary is saved. /// @param[in] len Expected number of items to be populated before list /// becomes accessible from Vimscript. It is still valid to /// underpopulate a list, value only controls how many elements /// will be allocated in advance. @see ListLenSpecials. /// /// @return [allocated] list which should contain key-value pairs. Return value /// may be safely ignored. list_T *decode_create_map_special_dict(typval_T *const ret_tv, const ptrdiff_t len) FUNC_ATTR_NONNULL_ALL { list_T *const list = tv_list_alloc(len); tv_list_ref(list); create_special_dict(ret_tv, kMPMap, ((typval_T) { .v_type = VAR_LIST, .v_lock = VAR_UNLOCKED, .vval = { .v_list = list }, })); return list; } /// Convert char* string to typval_T /// /// Depending on whether string has (no) NUL bytes, it may use a special /// dictionary, VAR_BLOB, or decode string to VAR_STRING. /// /// @param[in] s String to decode. /// @param[in] len String length. /// @param[in] force_blob whether string always should be decoded as a blob, /// or only when embedded NUL bytes were present /// @param[in] s_allocated If true, then `s` was allocated and can be saved in /// a returned structure. If it is not saved there, it /// will be freed. /// /// @return Decoded string. typval_T decode_string(const char *const s, const size_t len, bool force_blob, const bool s_allocated) FUNC_ATTR_WARN_UNUSED_RESULT { assert(s != NULL || len == 0); const bool use_blob = force_blob || ((s != NULL) && (memchr(s, NUL, len) != NULL)); if (use_blob) { typval_T tv; tv.v_lock = VAR_UNLOCKED; blob_T *b = tv_blob_alloc_ret(&tv); if (s_allocated) { b->bv_ga.ga_data = (void *)s; b->bv_ga.ga_len = (int)len; b->bv_ga.ga_maxlen = (int)len; } else { ga_concat_len(&b->bv_ga, s, len); } return tv; } return (typval_T) { .v_type = VAR_STRING, .v_lock = VAR_UNLOCKED, .vval = { .v_string = ((s == NULL || s_allocated) ? (char *)s : xmemdupz(s, len)) }, }; } /// Parse JSON double-quoted string /// /// @param[in] buf Buffer being converted. /// @param[in] buf_len Length of the buffer. /// @param[in,out] pp Pointer to the start of the string. Must point to '"'. /// Is advanced to the closing '"'. Also see /// json_decoder_pop(), it may set pp to another location /// and alter next_map_special, didcomma and didcolon. /// @param[out] stack Object stack. /// @param[out] container_stack Container objects stack. /// @param[out] next_map_special Is set to true when dictionary is converted /// to a special map, otherwise not touched. /// @param[out] didcomma True if previous token was comma. Is set to recorded /// value when decoder is restarted, otherwise unused. /// @param[out] didcolon True if previous token was colon. Is set to recorded /// value when decoder is restarted, otherwise unused. /// /// @return OK in case of success, FAIL in case of error. static inline int parse_json_string(const char *const buf, const size_t buf_len, const char **const pp, ValuesStack *const stack, ContainerStack *const container_stack, bool *const next_map_special, bool *const didcomma, bool *const didcolon) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE { const char *const e = buf + buf_len; const char *p = *pp; size_t len = 0; const char *const s = ++p; int ret = OK; while (p < e && *p != '"') { if (*p == '\\') { p++; if (p == e) { semsg(_("E474: Unfinished escape sequence: %.*s"), (int)buf_len, buf); goto parse_json_string_fail; } switch (*p) { case 'u': if (p + 4 >= e) { semsg(_("E474: Unfinished unicode escape sequence: %.*s"), (int)buf_len, buf); goto parse_json_string_fail; } else if (!ascii_isxdigit(p[1]) || !ascii_isxdigit(p[2]) || !ascii_isxdigit(p[3]) || !ascii_isxdigit(p[4])) { semsg(_("E474: Expected four hex digits after \\u: %.*s"), LENP(p - 1, e)); goto parse_json_string_fail; } // One UTF-8 character below U+10000 can take up to 3 bytes, // above up to 6, but they are encoded using two \u escapes. len += 3; p += 5; break; case '\\': case '/': case '"': case 't': case 'b': case 'n': case 'r': case 'f': len++; p++; break; default: semsg(_("E474: Unknown escape sequence: %.*s"), LENP(p - 1, e)); goto parse_json_string_fail; } } else { uint8_t p_byte = (uint8_t)(*p); // unescaped = %x20-21 / %x23-5B / %x5D-10FFFF if (p_byte < 0x20) { semsg(_("E474: ASCII control characters cannot be present " "inside string: %.*s"), LENP(p, e)); goto parse_json_string_fail; } const int ch = utf_ptr2char(p); // All characters above U+007F are encoded using two or more bytes // and thus cannot possibly be equal to *p. But utf_ptr2char({0xFF, // 0}) will return 0xFF, even though 0xFF cannot start any UTF-8 // code point at all. // // The only exception is U+00C3 which is represented as 0xC3 0x83. if (ch >= 0x80 && p_byte == ch && !(ch == 0xC3 && p + 1 < e && (uint8_t)p[1] == 0x83)) { semsg(_("E474: Only UTF-8 strings allowed: %.*s"), LENP(p, e)); goto parse_json_string_fail; } else if (ch > 0x10FFFF) { semsg(_("E474: Only UTF-8 code points up to U+10FFFF " "are allowed to appear unescaped: %.*s"), LENP(p, e)); goto parse_json_string_fail; } const size_t ch_len = (size_t)utf_char2len(ch); assert(ch_len == (size_t)(ch ? utf_ptr2len(p) : 1)); len += ch_len; p += ch_len; } } if (p == e || *p != '"') { semsg(_("E474: Expected string end: %.*s"), (int)buf_len, buf); goto parse_json_string_fail; } char *str = xmalloc(len + 1); int fst_in_pair = 0; char *str_end = str; #define PUT_FST_IN_PAIR(fst_in_pair, str_end) \ do { \ if ((fst_in_pair) != 0) { \ (str_end) += utf_char2bytes(fst_in_pair, (str_end)); \ (fst_in_pair) = 0; \ } \ } while (0) for (const char *t = s; t < p; t++) { if (t[0] != '\\' || t[1] != 'u') { PUT_FST_IN_PAIR(fst_in_pair, str_end); } if (*t == '\\') { t++; switch (*t) { case 'u': { const char ubuf[] = { t[1], t[2], t[3], t[4] }; t += 4; uvarnumber_T ch; vim_str2nr(ubuf, NULL, NULL, STR2NR_HEX | STR2NR_FORCE, NULL, &ch, 4, true, NULL); if (SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) { PUT_FST_IN_PAIR(fst_in_pair, str_end); fst_in_pair = (int)ch; } else if (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END && fst_in_pair != 0) { const int full_char = ((int)(ch - SURROGATE_LO_START) + ((fst_in_pair - SURROGATE_HI_START) << 10) + SURROGATE_FIRST_CHAR); str_end += utf_char2bytes(full_char, str_end); fst_in_pair = 0; } else { PUT_FST_IN_PAIR(fst_in_pair, str_end); str_end += utf_char2bytes((int)ch, str_end); } break; } case '\\': case '/': case '"': case 't': case 'b': case 'n': case 'r': case 'f': { static const char escapes[] = { ['\\'] = '\\', ['/'] = '/', ['"'] = '"', ['t'] = TAB, ['b'] = BS, ['n'] = NL, ['r'] = CAR, ['f'] = FF, }; *str_end++ = escapes[(int)(*t)]; break; } default: abort(); } } else { *str_end++ = *t; } } PUT_FST_IN_PAIR(fst_in_pair, str_end); #undef PUT_FST_IN_PAIR *str_end = NUL; typval_T obj = decode_string(str, (size_t)(str_end - str), false, true); POP(obj, obj.v_type != VAR_STRING); goto parse_json_string_ret; parse_json_string_fail: ret = FAIL; parse_json_string_ret: *pp = p; return ret; } #undef POP /// Parse JSON number: both floating-point and integer /// /// Number format: `-?\d+(?:.\d+)?(?:[eE][+-]?\d+)?`. /// /// @param[in] buf Buffer being converted. /// @param[in] buf_len Length of the buffer. /// @param[in,out] pp Pointer to the start of the number. Must point to /// a digit or a minus sign. Is advanced to the last /// character of the number. Also see json_decoder_pop(), it /// may set pp to another location and alter /// next_map_special, didcomma and didcolon. /// @param[out] stack Object stack. /// @param[out] container_stack Container objects stack. /// @param[out] next_map_special Is set to true when dictionary is converted /// to a special map, otherwise not touched. /// @param[out] didcomma True if previous token was comma. Is set to recorded /// value when decoder is restarted, otherwise unused. /// @param[out] didcolon True if previous token was colon. Is set to recorded /// value when decoder is restarted, otherwise unused. /// /// @return OK in case of success, FAIL in case of error. static inline int parse_json_number(const char *const buf, const size_t buf_len, const char **const pp, ValuesStack *const stack, ContainerStack *const container_stack, bool *const next_map_special, bool *const didcomma, bool *const didcolon) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE { const char *const e = buf + buf_len; const char *p = *pp; int ret = OK; const char *const s = p; const char *ints = NULL; const char *fracs = NULL; const char *exps = NULL; const char *exps_s = NULL; if (*p == '-') { p++; } ints = p; if (p >= e) { goto parse_json_number_check; } while (p < e && ascii_isdigit(*p)) { p++; } if (p != ints + 1 && *ints == '0') { semsg(_("E474: Leading zeroes are not allowed: %.*s"), LENP(s, e)); goto parse_json_number_fail; } if (p >= e || p == ints) { goto parse_json_number_check; } if (*p == '.') { p++; fracs = p; while (p < e && ascii_isdigit(*p)) { p++; } if (p >= e || p == fracs) { goto parse_json_number_check; } } if (*p == 'e' || *p == 'E') { p++; exps_s = p; if (p < e && (*p == '-' || *p == '+')) { p++; } exps = p; while (p < e && ascii_isdigit(*p)) { p++; } } parse_json_number_check: if (p == ints) { semsg(_("E474: Missing number after minus sign: %.*s"), LENP(s, e)); goto parse_json_number_fail; } else if (p == fracs || (fracs != NULL && exps_s == fracs + 1)) { semsg(_("E474: Missing number after decimal dot: %.*s"), LENP(s, e)); goto parse_json_number_fail; } else if (p == exps) { semsg(_("E474: Missing exponent: %.*s"), LENP(s, e)); goto parse_json_number_fail; } typval_T tv = { .v_type = VAR_NUMBER, .v_lock = VAR_UNLOCKED, }; const size_t exp_num_len = (size_t)(p - s); if (fracs || exps) { // Convert floating-point number const size_t num_len = string2float(s, &tv.vval.v_float); if (exp_num_len != num_len) { semsg(_("E685: internal error: while converting number \"%.*s\" " "to float string2float consumed %zu bytes in place of %zu"), (int)exp_num_len, s, num_len, exp_num_len); } tv.v_type = VAR_FLOAT; } else { // Convert integer varnumber_T nr; int num_len; vim_str2nr(s, NULL, &num_len, 0, &nr, NULL, (int)(p - s), true, NULL); if ((int)exp_num_len != num_len) { semsg(_("E685: internal error: while converting number \"%.*s\" " "to integer vim_str2nr consumed %i bytes in place of %zu"), (int)exp_num_len, s, num_len, exp_num_len); } tv.vval.v_number = nr; } if (json_decoder_pop(OBJ(tv, false, *didcomma, *didcolon), stack, container_stack, &p, next_map_special, didcomma, didcolon) == FAIL) { goto parse_json_number_fail; } if (*next_map_special) { goto parse_json_number_ret; } p--; goto parse_json_number_ret; parse_json_number_fail: ret = FAIL; parse_json_number_ret: *pp = p; return ret; } #define POP(obj_tv, is_sp_string) \ do { \ if (json_decoder_pop(OBJ(obj_tv, is_sp_string, didcomma, didcolon), \ &stack, &container_stack, \ &p, &next_map_special, &didcomma, &didcolon) \ == FAIL) { \ goto json_decode_string_fail; \ } \ if (next_map_special) { \ goto json_decode_string_cycle_start; \ } \ } while (0) /// Convert JSON string into Vimscript object /// /// @param[in] buf String to convert. UTF-8 encoding is assumed. /// @param[in] buf_len Length of the string. /// @param[out] rettv Location where to save results. /// /// @return OK in case of success, FAIL otherwise. int json_decode_string(const char *const buf, const size_t buf_len, typval_T *const rettv) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT { const char *p = buf; const char *const e = buf + buf_len; while (p < e && (*p == ' ' || *p == TAB || *p == NL || *p == CAR)) { p++; } if (p == e) { emsg(_("E474: Attempt to decode a blank string")); return FAIL; } int ret = OK; ValuesStack stack = KV_INITIAL_VALUE; ContainerStack container_stack = KV_INITIAL_VALUE; rettv->v_type = VAR_UNKNOWN; bool didcomma = false; bool didcolon = false; bool next_map_special = false; for (; p < e; p++) { json_decode_string_cycle_start: assert(*p == '{' || next_map_special == false); switch (*p) { case '}': case ']': { if (kv_size(container_stack) == 0) { semsg(_("E474: No container to close: %.*s"), LENP(p, e)); goto json_decode_string_fail; } ContainerStackItem last_container = kv_last(container_stack); if (*p == '}' && last_container.container.v_type != VAR_DICT) { semsg(_("E474: Closing list with curly bracket: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (*p == ']' && last_container.container.v_type != VAR_LIST) { semsg(_("E474: Closing dictionary with square bracket: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (didcomma) { semsg(_("E474: Trailing comma: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (didcolon) { semsg(_("E474: Expected value after colon: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (last_container.stack_index != kv_size(stack) - 1) { assert(last_container.stack_index < kv_size(stack) - 1); semsg(_("E474: Expected value: %.*s"), LENP(p, e)); goto json_decode_string_fail; } if (kv_size(stack) == 1) { p++; (void)kv_pop(container_stack); goto json_decode_string_after_cycle; } else { if (json_decoder_pop(kv_pop(stack), &stack, &container_stack, &p, &next_map_special, &didcomma, &didcolon) == FAIL) { goto json_decode_string_fail; } assert(!next_map_special); break; } } case ',': { if (kv_size(container_stack) == 0) { semsg(_("E474: Comma not inside container: %.*s"), LENP(p, e)); goto json_decode_string_fail; } ContainerStackItem last_container = kv_last(container_stack); if (didcomma) { semsg(_("E474: Duplicate comma: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (didcolon) { semsg(_("E474: Comma after colon: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (last_container.container.v_type == VAR_DICT && last_container.stack_index != kv_size(stack) - 1) { semsg(_("E474: Using comma in place of colon: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (last_container.special_val == NULL ? (last_container.container.v_type == VAR_DICT ? (DICT_LEN(last_container.container.vval.v_dict) == 0) : (tv_list_len(last_container.container.vval.v_list) == 0)) : (tv_list_len(last_container.special_val) == 0)) { semsg(_("E474: Leading comma: %.*s"), LENP(p, e)); goto json_decode_string_fail; } didcomma = true; continue; } case ':': { if (kv_size(container_stack) == 0) { semsg(_("E474: Colon not inside container: %.*s"), LENP(p, e)); goto json_decode_string_fail; } ContainerStackItem last_container = kv_last(container_stack); if (last_container.container.v_type != VAR_DICT) { semsg(_("E474: Using colon not in dictionary: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (last_container.stack_index != kv_size(stack) - 2) { semsg(_("E474: Unexpected colon: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (didcomma) { semsg(_("E474: Colon after comma: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (didcolon) { semsg(_("E474: Duplicate colon: %.*s"), LENP(p, e)); goto json_decode_string_fail; } didcolon = true; continue; } case ' ': case TAB: case NL: case CAR: continue; case 'n': if ((p + 3) >= e || strncmp(p + 1, "ull", 3) != 0) { semsg(_("E474: Expected null: %.*s"), LENP(p, e)); goto json_decode_string_fail; } p += 3; POP(((typval_T) { .v_type = VAR_SPECIAL, .v_lock = VAR_UNLOCKED, .vval = { .v_special = kSpecialVarNull }, }), false); break; case 't': if ((p + 3) >= e || strncmp(p + 1, "rue", 3) != 0) { semsg(_("E474: Expected true: %.*s"), LENP(p, e)); goto json_decode_string_fail; } p += 3; POP(((typval_T) { .v_type = VAR_BOOL, .v_lock = VAR_UNLOCKED, .vval = { .v_bool = kBoolVarTrue }, }), false); break; case 'f': if ((p + 4) >= e || strncmp(p + 1, "alse", 4) != 0) { semsg(_("E474: Expected false: %.*s"), LENP(p, e)); goto json_decode_string_fail; } p += 4; POP(((typval_T) { .v_type = VAR_BOOL, .v_lock = VAR_UNLOCKED, .vval = { .v_bool = kBoolVarFalse }, }), false); break; case '"': if (parse_json_string(buf, buf_len, &p, &stack, &container_stack, &next_map_special, &didcomma, &didcolon) == FAIL) { // Error message was already given goto json_decode_string_fail; } if (next_map_special) { goto json_decode_string_cycle_start; } break; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (parse_json_number(buf, buf_len, &p, &stack, &container_stack, &next_map_special, &didcomma, &didcolon) == FAIL) { // Error message was already given goto json_decode_string_fail; } if (next_map_special) { goto json_decode_string_cycle_start; } break; case '[': { list_T *list = tv_list_alloc(kListLenMayKnow); tv_list_ref(list); typval_T tv = { .v_type = VAR_LIST, .v_lock = VAR_UNLOCKED, .vval = { .v_list = list }, }; kv_push(container_stack, ((ContainerStackItem) { .stack_index = kv_size(stack), .s = p, .container = tv, .special_val = NULL })); kv_push(stack, OBJ(tv, false, didcomma, didcolon)); break; } case '{': { typval_T tv; list_T *val_list = NULL; if (next_map_special) { next_map_special = false; val_list = decode_create_map_special_dict(&tv, kListLenMayKnow); } else { dict_T *dict = tv_dict_alloc(); dict->dv_refcount++; tv = (typval_T) { .v_type = VAR_DICT, .v_lock = VAR_UNLOCKED, .vval = { .v_dict = dict }, }; } kv_push(container_stack, ((ContainerStackItem) { .stack_index = kv_size(stack), .s = p, .container = tv, .special_val = val_list })); kv_push(stack, OBJ(tv, false, didcomma, didcolon)); break; } default: semsg(_("E474: Unidentified byte: %.*s"), LENP(p, e)); goto json_decode_string_fail; } didcomma = false; didcolon = false; if (kv_size(container_stack) == 0) { p++; break; } } json_decode_string_after_cycle: for (; p < e; p++) { switch (*p) { case NL: case ' ': case TAB: case CAR: break; default: semsg(_("E474: Trailing characters: %.*s"), LENP(p, e)); goto json_decode_string_fail; } } if (kv_size(stack) == 1 && kv_size(container_stack) == 0) { *rettv = kv_pop(stack).val; goto json_decode_string_ret; } semsg(_("E474: Unexpected end of input: %.*s"), (int)buf_len, buf); json_decode_string_fail: ret = FAIL; while (kv_size(stack)) { tv_clear(&(kv_pop(stack).val)); } json_decode_string_ret: kv_destroy(stack); kv_destroy(container_stack); return ret; } #undef LENP #undef POP #undef OBJ #undef DICT_LEN static void positive_integer_to_special_typval(typval_T *rettv, uint64_t val) { if (val <= VARNUMBER_MAX) { *rettv = (typval_T) { .v_type = VAR_NUMBER, .v_lock = VAR_UNLOCKED, .vval = { .v_number = (varnumber_T)val }, }; } else { list_T *const list = tv_list_alloc(4); tv_list_ref(list); create_special_dict(rettv, kMPInteger, ((typval_T) { .v_type = VAR_LIST, .v_lock = VAR_UNLOCKED, .vval = { .v_list = list }, })); tv_list_append_number(list, 1); tv_list_append_number(list, (varnumber_T)((val >> 62) & 0x3)); tv_list_append_number(list, (varnumber_T)((val >> 31) & 0x7FFFFFFF)); tv_list_append_number(list, (varnumber_T)(val & 0x7FFFFFFF)); } } static void typval_parse_enter(mpack_parser_t *parser, mpack_node_t *node) { typval_T *result = NULL; mpack_node_t *parent = MPACK_PARENT_NODE(node); if (parent) { switch (parent->tok.type) { case MPACK_TOKEN_ARRAY: { list_T *list = parent->data[1].p; result = tv_list_append_owned_tv(list, (typval_T) { .v_type = VAR_UNKNOWN }); break; } case MPACK_TOKEN_MAP: { typval_T(*items)[2] = parent->data[1].p; result = &items[parent->pos][parent->key_visited]; break; } case MPACK_TOKEN_STR: case MPACK_TOKEN_BIN: case MPACK_TOKEN_EXT: assert(node->tok.type == MPACK_TOKEN_CHUNK); break; default: abort(); } } else { result = parser->data.p; } // for types that are completed in typval_parse_exit node->data[0].p = result; node->data[1].p = NULL; // free on error if non-NULL switch (node->tok.type) { case MPACK_TOKEN_NIL: *result = (typval_T) { .v_type = VAR_SPECIAL, .v_lock = VAR_UNLOCKED, .vval = { .v_special = kSpecialVarNull }, }; break; case MPACK_TOKEN_BOOLEAN: *result = (typval_T) { .v_type = VAR_BOOL, .v_lock = VAR_UNLOCKED, .vval = { .v_bool = mpack_unpack_boolean(node->tok) ? kBoolVarTrue : kBoolVarFalse }, }; break; case MPACK_TOKEN_SINT: { *result = (typval_T) { .v_type = VAR_NUMBER, .v_lock = VAR_UNLOCKED, .vval = { .v_number = mpack_unpack_sint(node->tok) }, }; break; } case MPACK_TOKEN_UINT: positive_integer_to_special_typval(result, mpack_unpack_uint(node->tok)); break; case MPACK_TOKEN_FLOAT: *result = (typval_T) { .v_type = VAR_FLOAT, .v_lock = VAR_UNLOCKED, .vval = { .v_float = mpack_unpack_float(node->tok) }, }; break; case MPACK_TOKEN_BIN: case MPACK_TOKEN_STR: case MPACK_TOKEN_EXT: // actually converted in typval_parse_exit after the data chunks node->data[1].p = xmallocz(node->tok.length); break; case MPACK_TOKEN_CHUNK: { char *data = parent->data[1].p; memcpy(data + parent->pos, node->tok.data.chunk_ptr, node->tok.length); break; } case MPACK_TOKEN_ARRAY: { list_T *const list = tv_list_alloc((ptrdiff_t)node->tok.length); tv_list_ref(list); *result = (typval_T) { .v_type = VAR_LIST, .v_lock = VAR_UNLOCKED, .vval = { .v_list = list }, }; node->data[1].p = list; break; } case MPACK_TOKEN_MAP: // we don't know if this will be safe to convert to a typval dict yet node->data[1].p = xmallocz(node->tok.length * 2 * sizeof(typval_T)); break; } } /// Free node which was entered but never exited, due to a nested error /// /// Don't bother with typvals as these will be GC:d eventually void typval_parser_error_free(mpack_parser_t *parser) { for (uint32_t i = 0; i < parser->size; i++) { mpack_node_t *node = &parser->items[i]; switch (node->tok.type) { case MPACK_TOKEN_BIN: case MPACK_TOKEN_STR: case MPACK_TOKEN_EXT: case MPACK_TOKEN_MAP: XFREE_CLEAR(node->data[1].p); break; default: break; } } } static void typval_parse_exit(mpack_parser_t *parser, mpack_node_t *node) { typval_T *result = node->data[0].p; switch (node->tok.type) { case MPACK_TOKEN_BIN: case MPACK_TOKEN_STR: *result = decode_string(node->data[1].p, node->tok.length, false, true); node->data[1].p = NULL; break; case MPACK_TOKEN_EXT: { list_T *const list = tv_list_alloc(2); tv_list_ref(list); tv_list_append_number(list, node->tok.data.ext_type); list_T *const ext_val_list = tv_list_alloc(kListLenMayKnow); tv_list_append_list(list, ext_val_list); create_special_dict(result, kMPExt, ((typval_T) { .v_type = VAR_LIST, .v_lock = VAR_UNLOCKED, .vval = { .v_list = list } })); // TODO(bfredl): why not use BLOB? encode_list_write((void *)ext_val_list, node->data[1].p, node->tok.length); XFREE_CLEAR(node->data[1].p); } break; case MPACK_TOKEN_MAP: { typval_T(*items)[2] = node->data[1].p; for (size_t i = 0; i < node->tok.length; i++) { typval_T *key = &items[i][0]; if (key->v_type != VAR_STRING || key->vval.v_string == NULL || key->vval.v_string[0] == NUL) { goto msgpack_to_vim_generic_map; } } dict_T *const dict = tv_dict_alloc(); dict->dv_refcount++; *result = (typval_T) { .v_type = VAR_DICT, .v_lock = VAR_UNLOCKED, .vval = { .v_dict = dict }, }; for (size_t i = 0; i < node->tok.length; i++) { char *key = items[i][0].vval.v_string; size_t keylen = strlen(key); dictitem_T *const di = xmallocz(offsetof(dictitem_T, di_key) + keylen); memcpy(&di->di_key[0], key, keylen); di->di_tv.v_type = VAR_UNKNOWN; if (tv_dict_add(dict, di) == FAIL) { // Duplicate key: fallback to generic map TV_DICT_ITER(dict, d, { d->di_tv.v_type = VAR_SPECIAL; // don't free values in tv_clear(), they will be reused d->di_tv.vval.v_special = kSpecialVarNull; }); tv_clear(result); xfree(di); goto msgpack_to_vim_generic_map; } di->di_tv = items[i][1]; } for (size_t i = 0; i < node->tok.length; i++) { xfree(items[i][0].vval.v_string); } XFREE_CLEAR(node->data[1].p); break; msgpack_to_vim_generic_map: {} list_T *const list = decode_create_map_special_dict(result, node->tok.length); for (size_t i = 0; i < node->tok.length; i++) { list_T *const kv_pair = tv_list_alloc(2); tv_list_append_list(list, kv_pair); tv_list_append_owned_tv(kv_pair, items[i][0]); tv_list_append_owned_tv(kv_pair, items[i][1]); } XFREE_CLEAR(node->data[1].p); break; } default: // other kinds are handled completely in typval_parse_enter break; } } int mpack_parse_typval(mpack_parser_t *parser, const char **data, size_t *size) { return mpack_parse(parser, data, size, typval_parse_enter, typval_parse_exit); } int unpack_typval(const char **data, size_t *size, typval_T *ret) { ret->v_type = VAR_UNKNOWN; mpack_parser_t parser; mpack_parser_init(&parser, 0); parser.data.p = ret; int status = mpack_parse_typval(&parser, data, size); if (status != MPACK_OK) { typval_parser_error_free(&parser); tv_clear(ret); } return status; }