From 41b44d114c030e01a7e15084d0510555ec363605 Mon Sep 17 00:00:00 2001 From: ZyX Date: Wed, 3 Feb 2016 18:50:19 +0300 Subject: eval: Move encode.c to eval/encode.c --- src/nvim/eval/encode.c | 1292 ++++++++++++++++++++++++++++++++++++++++++++++++ src/nvim/eval/encode.h | 60 +++ 2 files changed, 1352 insertions(+) create mode 100644 src/nvim/eval/encode.c create mode 100644 src/nvim/eval/encode.h (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c new file mode 100644 index 0000000000..e23e68dc62 --- /dev/null +++ b/src/nvim/eval/encode.c @@ -0,0 +1,1292 @@ +/// @file encode.c +/// +/// File containing functions for encoding and decoding VimL values. +/// +/// Split out from eval.c. + +#include +#include +#include + +#include "nvim/eval/encode.h" +#include "nvim/buffer_defs.h" // vimconv_T +#include "nvim/eval.h" +#include "nvim/eval_defs.h" +#include "nvim/garray.h" +#include "nvim/mbyte.h" +#include "nvim/message.h" +#include "nvim/charset.h" // vim_isprintc() +#include "nvim/macros.h" +#include "nvim/ascii.h" +#include "nvim/vim.h" // For _() +#include "nvim/lib/kvec.h" + +#define ga_concat(a, b) ga_concat(a, (char_u *)b) +#define utf_ptr2char(b) utf_ptr2char((char_u *)b) +#define utf_ptr2len(b) ((size_t)utf_ptr2len((char_u *)b)) +#define utf_char2len(b) ((size_t)utf_char2len(b)) +#define string_convert(a, b, c) \ + ((char *)string_convert((vimconv_T *)a, (char_u *)b, c)) +#define convert_setup(vcp, from, to) \ + (convert_setup(vcp, (char_u *)from, (char_u *)to)) + +/// Structure representing current VimL to messagepack conversion state +typedef struct { + enum { + kMPConvDict, ///< Convert dict_T *dictionary. + kMPConvList, ///< Convert list_T *list. + kMPConvPairs, ///< Convert mapping represented as a list_T* of pairs. + } type; + union { + struct { + dict_T *dict; ///< Currently converted dictionary. + hashitem_T *hi; ///< Currently converted dictionary item. + size_t todo; ///< Amount of items left to process. + } d; ///< State of dictionary conversion. + struct { + list_T *list; ///< Currently converted list. + listitem_T *li; ///< Currently converted list item. + } l; ///< State of list or generic mapping conversion. + } data; ///< Data to convert. +} MPConvStackVal; + +/// Stack used to convert VimL values to messagepack. +typedef kvec_t(MPConvStackVal) MPConvStack; + +const char *const encode_special_var_names[] = { + [kSpecialVarNull] = "null", + [kSpecialVarNone] = "none", + [kSpecialVarTrue] = "true", + [kSpecialVarFalse] = "false", +}; + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "eval/encode.c.generated.h" +#endif + +/// Msgpack callback for writing to readfile()-style list +int msgpack_list_write(void *data, const char *buf, size_t len) +{ + if (len == 0) { + return 0; + } + list_T *const list = (list_T *) data; + const char *const end = buf + len; + const char *line_end = buf; + if (list->lv_last == NULL) { + list_append_string(list, NULL, 0); + } + listitem_T *li = list->lv_last; + do { + const char *line_start = line_end; + line_end = xmemscan(line_start, NL, (size_t) (end - line_start)); + if (line_end == line_start) { + list_append_allocated_string(list, NULL); + } else { + const size_t line_length = (size_t) (line_end - line_start); + char *str; + if (li == NULL) { + str = xmemdupz(line_start, line_length); + } else { + const size_t li_len = (li->li_tv.vval.v_string == NULL + ? 0 + : STRLEN(li->li_tv.vval.v_string)); + li->li_tv.vval.v_string = xrealloc(li->li_tv.vval.v_string, + li_len + line_length + 1); + str = (char *) li->li_tv.vval.v_string + li_len; + memmove(str, line_start, line_length); + str[line_length] = 0; + } + for (size_t i = 0; i < line_length; i++) { + if (str[i] == NUL) { + str[i] = NL; + } + } + if (li == NULL) { + list_append_allocated_string(list, str); + } else { + li = NULL; + } + if (line_end == end - 1) { + list_append_allocated_string(list, NULL); + } + } + line_end++; + } while (line_end < end); + return 0; +} + +/// Abort conversion to string after a recursion error. +static bool did_echo_string_emsg = false; + +/// Show a error message when converting to msgpack value +/// +/// @param[in] msg Error message to dump. Must contain exactly two %s that +/// will be replaced with what was being dumped: first with +/// something like “F” or “function argument”, second with path +/// to the failed value. +/// @param[in] mpstack Path to the failed value. +/// @param[in] objname Dumped object name. +/// +/// @return FAIL. +static int conv_error(const char *const msg, const MPConvStack *const mpstack, + const char *const objname) + FUNC_ATTR_NONNULL_ALL +{ + garray_T msg_ga; + ga_init(&msg_ga, (int)sizeof(char), 80); + char *const key_msg = _("key %s"); + char *const key_pair_msg = _("key %s at index %i from special map"); + char *const idx_msg = _("index %i"); + for (size_t i = 0; i < kv_size(*mpstack); i++) { + if (i != 0) { + ga_concat(&msg_ga, ", "); + } + MPConvStackVal v = kv_A(*mpstack, i); + switch (v.type) { + case kMPConvDict: { + typval_T key_tv = { + .v_type = VAR_STRING, + .vval = { .v_string = (v.data.d.hi == NULL + ? v.data.d.dict->dv_hashtab.ht_array + : (v.data.d.hi - 1))->hi_key }, + }; + char *const key = encode_tv2string(&key_tv, NULL); + vim_snprintf((char *) IObuff, IOSIZE, key_msg, key); + xfree(key); + ga_concat(&msg_ga, IObuff); + break; + } + case kMPConvPairs: + case kMPConvList: { + int idx = 0; + const listitem_T *li; + for (li = v.data.l.list->lv_first; + li != NULL && li->li_next != v.data.l.li; + li = li->li_next) { + idx++; + } + if (v.type == kMPConvList + || li == NULL + || (li->li_tv.v_type != VAR_LIST + && li->li_tv.vval.v_list->lv_len <= 0)) { + vim_snprintf((char *) IObuff, IOSIZE, idx_msg, idx); + ga_concat(&msg_ga, IObuff); + } else { + typval_T key_tv = li->li_tv.vval.v_list->lv_first->li_tv; + char *const key = encode_tv2echo(&key_tv, NULL); + vim_snprintf((char *) IObuff, IOSIZE, key_pair_msg, key, idx); + xfree(key); + ga_concat(&msg_ga, IObuff); + } + break; + } + } + } + EMSG3(msg, objname, (kv_size(*mpstack) == 0 + ? _("itself") + : (char *) msg_ga.ga_data)); + ga_clear(&msg_ga); + return FAIL; +} + +/// Convert readfile()-style list to a char * buffer with length +/// +/// @param[in] list Converted list. +/// @param[out] ret_len Resulting buffer length. +/// @param[out] ret_buf Allocated buffer with the result or NULL if ret_len is +/// zero. +/// +/// @return true in case of success, false in case of failure. +bool encode_vim_list_to_buf(const list_T *const list, size_t *const ret_len, + char **const ret_buf) + FUNC_ATTR_NONNULL_ARG(2,3) FUNC_ATTR_WARN_UNUSED_RESULT +{ + size_t len = 0; + if (list != NULL) { + for (const listitem_T *li = list->lv_first; + li != NULL; + li = li->li_next) { + if (li->li_tv.v_type != VAR_STRING) { + return false; + } + len++; + if (li->li_tv.vval.v_string != 0) { + len += STRLEN(li->li_tv.vval.v_string); + } + } + if (len) { + len--; + } + } + *ret_len = len; + if (len == 0) { + *ret_buf = NULL; + return true; + } + ListReaderState lrstate = encode_init_lrstate(list); + char *const buf = xmalloc(len); + size_t read_bytes; + if (encode_read_from_list(&lrstate, buf, len, &read_bytes) != OK) { + assert(false); + } + assert(len == read_bytes); + *ret_buf = buf; + return true; +} + +/// Read bytes from list +/// +/// @param[in,out] state Structure describing position in list from which +/// reading should start. Is updated to reflect position +/// at which reading ended. +/// @param[out] buf Buffer to write to. +/// @param[in] nbuf Buffer length. +/// @param[out] read_bytes Is set to amount of bytes read. +/// +/// @return OK when reading was finished, FAIL in case of error (i.e. list item +/// was not a string), NOTDONE if reading was successfull, but there are +/// more bytes to read. +int encode_read_from_list(ListReaderState *const state, char *const buf, + const size_t nbuf, size_t *const read_bytes) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT +{ + char *const buf_end = buf + nbuf; + char *p = buf; + while (p < buf_end) { + for (size_t i = state->offset; i < state->li_length && p < buf_end; i++) { + const char ch = (char) state->li->li_tv.vval.v_string[state->offset++]; + *p++ = (ch == NL ? NUL : ch); + } + if (p < buf_end) { + state->li = state->li->li_next; + if (state->li == NULL) { + *read_bytes = (size_t) (p - buf); + return OK; + } + *p++ = NL; + if (state->li->li_tv.v_type != VAR_STRING) { + *read_bytes = (size_t) (p - buf); + return FAIL; + } + state->offset = 0; + state->li_length = (state->li->li_tv.vval.v_string == NULL + ? 0 + : STRLEN(state->li->li_tv.vval.v_string)); + } + } + *read_bytes = nbuf; + return (state->offset < state->li_length || state->li->li_next != NULL + ? NOTDONE + : OK); +} + +/// Code for checking whether container references itself +/// +/// @param[in,out] val Container to check. +/// @param copyID_attr Name of the container attribute that holds copyID. +/// After checking whether value of this attribute is +/// copyID (variable) it is set to copyID. +#define CHECK_SELF_REFERENCE(val, copyID_attr, conv_type) \ + do { \ + if ((val)->copyID_attr == copyID) { \ + CONV_RECURSE((val), conv_type); \ + } \ + (val)->copyID_attr = copyID; \ + } while (0) + +/// Define functions which convert VimL value to something else +/// +/// Creates function `vim_to_{name}(firstargtype firstargname, typval_T *const +/// tv)` which returns OK or FAIL and helper functions. +/// +/// @param firstargtype Type of the first argument. It will be used to return +/// the results. +/// @param firstargname Name of the first argument. +/// @param name Name of the target converter. +#define DEFINE_VIML_CONV_FUNCTIONS(scope, name, firstargtype, firstargname) \ +static int name##_convert_one_value(firstargtype firstargname, \ + MPConvStack *const mpstack, \ + typval_T *const tv, \ + const int copyID, \ + const char *const objname) \ + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT \ +{ \ + switch (tv->v_type) { \ + case VAR_STRING: { \ + CONV_STRING(tv->vval.v_string, STRLEN(tv->vval.v_string)); \ + break; \ + } \ + case VAR_NUMBER: { \ + CONV_NUMBER(tv->vval.v_number); \ + break; \ + } \ + case VAR_FLOAT: { \ + CONV_FLOAT(tv->vval.v_float); \ + break; \ + } \ + case VAR_FUNC: { \ + CONV_FUNC(tv->vval.v_string); \ + break; \ + } \ + case VAR_LIST: { \ + if (tv->vval.v_list == NULL || tv->vval.v_list->lv_len == 0) { \ + CONV_EMPTY_LIST(); \ + break; \ + } \ + CHECK_SELF_REFERENCE(tv->vval.v_list, lv_copyID, kMPConvList); \ + CONV_LIST_START(tv->vval.v_list); \ + kv_push( \ + MPConvStackVal, \ + *mpstack, \ + ((MPConvStackVal) { \ + .type = kMPConvList, \ + .data = { \ + .l = { \ + .list = tv->vval.v_list, \ + .li = tv->vval.v_list->lv_first, \ + }, \ + }, \ + })); \ + break; \ + } \ + case VAR_SPECIAL: { \ + switch (tv->vval.v_special) { \ + case kSpecialVarNull: { \ + CONV_NIL(); \ + break; \ + } \ + case kSpecialVarTrue: \ + case kSpecialVarFalse: { \ + CONV_BOOL(tv->vval.v_special == kSpecialVarTrue); \ + break; \ + } \ + case kSpecialVarNone: { \ + CONV_NONE_VAL(); \ + break; \ + } \ + } \ + break; \ + } \ + case VAR_DICT: { \ + if (tv->vval.v_dict == NULL \ + || tv->vval.v_dict->dv_hashtab.ht_used == 0) { \ + CONV_EMPTY_DICT(); \ + break; \ + } \ + const dictitem_T *type_di; \ + const dictitem_T *val_di; \ + if (CONV_ALLOW_SPECIAL \ + && tv->vval.v_dict->dv_hashtab.ht_used == 2 \ + && (type_di = dict_find((dict_T *) tv->vval.v_dict, \ + (char_u *) "_TYPE", -1)) != NULL \ + && type_di->di_tv.v_type == VAR_LIST \ + && (val_di = dict_find((dict_T *) tv->vval.v_dict, \ + (char_u *) "_VAL", -1)) != NULL) { \ + size_t i; \ + for (i = 0; i < ARRAY_SIZE(eval_msgpack_type_lists); i++) { \ + if (type_di->di_tv.vval.v_list == eval_msgpack_type_lists[i]) { \ + break; \ + } \ + } \ + if (i == ARRAY_SIZE(eval_msgpack_type_lists)) { \ + goto name##_convert_one_value_regular_dict; \ + } \ + switch ((MessagePackType) i) { \ + case kMPNil: { \ + CONV_NIL(); \ + break; \ + } \ + case kMPBoolean: { \ + if (val_di->di_tv.v_type != VAR_NUMBER) { \ + goto name##_convert_one_value_regular_dict; \ + } \ + CONV_BOOL(val_di->di_tv.vval.v_number); \ + break; \ + } \ + case kMPInteger: { \ + const list_T *val_list; \ + varnumber_T sign; \ + varnumber_T highest_bits; \ + varnumber_T high_bits; \ + varnumber_T low_bits; \ + /* List of 4 integers; first is signed (should be 1 or -1, but */ \ + /* this is not checked), second is unsigned and have at most */ \ + /* one (sign is -1) or two (sign is 1) non-zero bits (number of */ \ + /* bits is not checked), other unsigned and have at most 31 */ \ + /* non-zero bits (number of bits is not checked).*/ \ + if (val_di->di_tv.v_type != VAR_LIST \ + || (val_list = val_di->di_tv.vval.v_list) == NULL \ + || val_list->lv_len != 4 \ + || val_list->lv_first->li_tv.v_type != VAR_NUMBER \ + || (sign = val_list->lv_first->li_tv.vval.v_number) == 0 \ + || val_list->lv_first->li_next->li_tv.v_type != VAR_NUMBER \ + || (highest_bits = \ + val_list->lv_first->li_next->li_tv.vval.v_number) < 0 \ + || val_list->lv_last->li_prev->li_tv.v_type != VAR_NUMBER \ + || (high_bits = \ + val_list->lv_last->li_prev->li_tv.vval.v_number) < 0 \ + || val_list->lv_last->li_tv.v_type != VAR_NUMBER \ + || (low_bits = val_list->lv_last->li_tv.vval.v_number) < 0) { \ + goto name##_convert_one_value_regular_dict; \ + } \ + uint64_t number = ((uint64_t) (((uint64_t) highest_bits) << 62) \ + | (uint64_t) (((uint64_t) high_bits) << 31) \ + | (uint64_t) low_bits); \ + if (sign > 0) { \ + CONV_UNSIGNED_NUMBER(number); \ + } else { \ + CONV_NUMBER(-number); \ + } \ + break; \ + } \ + case kMPFloat: { \ + if (val_di->di_tv.v_type != VAR_FLOAT) { \ + goto name##_convert_one_value_regular_dict; \ + } \ + CONV_FLOAT(val_di->di_tv.vval.v_float); \ + break; \ + } \ + case kMPString: \ + case kMPBinary: { \ + const bool is_string = ((MessagePackType) i == kMPString); \ + if (val_di->di_tv.v_type != VAR_LIST) { \ + goto name##_convert_one_value_regular_dict; \ + } \ + size_t len; \ + char *buf; \ + if (!encode_vim_list_to_buf(val_di->di_tv.vval.v_list, &len, \ + &buf)) { \ + goto name##_convert_one_value_regular_dict; \ + } \ + if (is_string) { \ + CONV_STR_STRING(buf, len); \ + } else { \ + CONV_STRING(buf, len); \ + } \ + xfree(buf); \ + break; \ + } \ + case kMPArray: { \ + if (val_di->di_tv.v_type != VAR_LIST) { \ + goto name##_convert_one_value_regular_dict; \ + } \ + CHECK_SELF_REFERENCE(val_di->di_tv.vval.v_list, lv_copyID, \ + kMPConvList); \ + CONV_LIST_START(val_di->di_tv.vval.v_list); \ + kv_push(MPConvStackVal, *mpstack, ((MPConvStackVal) { \ + .type = kMPConvList, \ + .data = { \ + .l = { \ + .list = val_di->di_tv.vval.v_list, \ + .li = val_di->di_tv.vval.v_list->lv_first, \ + }, \ + }, \ + })); \ + break; \ + } \ + case kMPMap: { \ + if (val_di->di_tv.v_type != VAR_LIST) { \ + goto name##_convert_one_value_regular_dict; \ + } \ + list_T *const val_list = val_di->di_tv.vval.v_list; \ + if (val_list == NULL || val_list->lv_len == 0) { \ + CONV_EMPTY_DICT(); \ + break; \ + } \ + for (const listitem_T *li = val_list->lv_first; li != NULL; \ + li = li->li_next) { \ + if (li->li_tv.v_type != VAR_LIST \ + || li->li_tv.vval.v_list->lv_len != 2) { \ + goto name##_convert_one_value_regular_dict; \ + } \ + } \ + CHECK_SELF_REFERENCE(val_list, lv_copyID, kMPConvPairs); \ + CONV_DICT_START(val_list->lv_len); \ + kv_push(MPConvStackVal, *mpstack, ((MPConvStackVal) { \ + .type = kMPConvPairs, \ + .data = { \ + .l = { \ + .list = val_list, \ + .li = val_list->lv_first, \ + }, \ + }, \ + })); \ + break; \ + } \ + case kMPExt: { \ + const list_T *val_list; \ + varnumber_T type; \ + if (val_di->di_tv.v_type != VAR_LIST \ + || (val_list = val_di->di_tv.vval.v_list) == NULL \ + || val_list->lv_len != 2 \ + || (val_list->lv_first->li_tv.v_type != VAR_NUMBER) \ + || (type = val_list->lv_first->li_tv.vval.v_number) > INT8_MAX \ + || type < INT8_MIN \ + || (val_list->lv_last->li_tv.v_type != VAR_LIST)) { \ + goto name##_convert_one_value_regular_dict; \ + } \ + size_t len; \ + char *buf; \ + if (!encode_vim_list_to_buf(val_list->lv_last->li_tv.vval.v_list, \ + &len, &buf)) { \ + goto name##_convert_one_value_regular_dict; \ + } \ + CONV_EXT_STRING(buf, len, type); \ + xfree(buf); \ + break; \ + } \ + } \ + break; \ + } \ +name##_convert_one_value_regular_dict: \ + CHECK_SELF_REFERENCE(tv->vval.v_dict, dv_copyID, kMPConvDict); \ + CONV_DICT_START(tv->vval.v_dict->dv_hashtab.ht_used); \ + kv_push(MPConvStackVal, *mpstack, ((MPConvStackVal) { \ + .type = kMPConvDict, \ + .data = { \ + .d = { \ + .dict = tv->vval.v_dict, \ + .hi = tv->vval.v_dict->dv_hashtab.ht_array, \ + .todo = tv->vval.v_dict->dv_hashtab.ht_used, \ + }, \ + }, \ + })); \ + break; \ + } \ + case VAR_UNKNOWN: { \ + EMSG2(_(e_intern2), #name "_convert_one_value()"); \ + return FAIL; \ + } \ + } \ + return OK; \ +} \ +\ +scope int encode_vim_to_##name(firstargtype firstargname, typval_T *const tv, \ + const char *const objname) \ + FUNC_ATTR_WARN_UNUSED_RESULT \ +{ \ + const int copyID = get_copyID(); \ + MPConvStack mpstack; \ + kv_init(mpstack); \ + if (name##_convert_one_value(firstargname, &mpstack, tv, copyID, objname) \ + == FAIL) { \ + goto encode_vim_to_##name##_error_ret; \ + } \ + while (kv_size(mpstack)) { \ + MPConvStackVal *cur_mpsv = &kv_A(mpstack, kv_size(mpstack) - 1); \ + typval_T *cur_tv = NULL; \ + switch (cur_mpsv->type) { \ + case kMPConvDict: { \ + if (!cur_mpsv->data.d.todo) { \ + (void) kv_pop(mpstack); \ + cur_mpsv->data.d.dict->dv_copyID = copyID - 1; \ + CONV_DICT_END(); \ + continue; \ + } else if (cur_mpsv->data.d.todo \ + != cur_mpsv->data.d.dict->dv_hashtab.ht_used) { \ + CONV_DICT_BETWEEN_ITEMS(); \ + } \ + while (HASHITEM_EMPTY(cur_mpsv->data.d.hi)) { \ + cur_mpsv->data.d.hi++; \ + } \ + dictitem_T *const di = HI2DI(cur_mpsv->data.d.hi); \ + cur_mpsv->data.d.todo--; \ + cur_mpsv->data.d.hi++; \ + CONV_STR_STRING(&di->di_key[0], STRLEN(&di->di_key[0])); \ + CONV_DICT_AFTER_KEY(); \ + cur_tv = &di->di_tv; \ + break; \ + } \ + case kMPConvList: { \ + if (cur_mpsv->data.l.li == NULL) { \ + (void) kv_pop(mpstack); \ + cur_mpsv->data.l.list->lv_copyID = copyID - 1; \ + CONV_LIST_END(cur_mpsv->data.l.list); \ + continue; \ + } else if (cur_mpsv->data.l.li != cur_mpsv->data.l.list->lv_first) { \ + CONV_LIST_BETWEEN_ITEMS(); \ + } \ + cur_tv = &cur_mpsv->data.l.li->li_tv; \ + cur_mpsv->data.l.li = cur_mpsv->data.l.li->li_next; \ + break; \ + } \ + case kMPConvPairs: { \ + if (cur_mpsv->data.l.li == NULL) { \ + (void) kv_pop(mpstack); \ + cur_mpsv->data.l.list->lv_copyID = copyID - 1; \ + CONV_DICT_END(); \ + continue; \ + } else if (cur_mpsv->data.l.li != cur_mpsv->data.l.list->lv_first) { \ + CONV_DICT_BETWEEN_ITEMS(); \ + } \ + const list_T *const kv_pair = cur_mpsv->data.l.li->li_tv.vval.v_list; \ + CONV_SPECIAL_DICT_KEY_CHECK(kv_pair); \ + if (name##_convert_one_value(firstargname, &mpstack, \ + &kv_pair->lv_first->li_tv, copyID, \ + objname) == FAIL) { \ + goto encode_vim_to_##name##_error_ret; \ + } \ + CONV_DICT_AFTER_KEY(); \ + cur_tv = &kv_pair->lv_last->li_tv; \ + cur_mpsv->data.l.li = cur_mpsv->data.l.li->li_next; \ + break; \ + } \ + } \ + assert(cur_tv != NULL); \ + if (name##_convert_one_value(firstargname, &mpstack, cur_tv, copyID, \ + objname) == FAIL) { \ + goto encode_vim_to_##name##_error_ret; \ + } \ + } \ + kv_destroy(mpstack); \ + return OK; \ +encode_vim_to_##name##_error_ret: \ + kv_destroy(mpstack); \ + return FAIL; \ +} + +#define CONV_STRING(buf, len) \ + do { \ + const char *const buf_ = (const char *) buf; \ + if (buf == NULL) { \ + ga_concat(gap, "''"); \ + } else { \ + const size_t len_ = (len); \ + size_t num_quotes = 0; \ + for (size_t i = 0; i < len_; i++) { \ + if (buf_[i] == '\'') { \ + num_quotes++; \ + } \ + } \ + ga_grow(gap, (int) (2 + len_ + num_quotes)); \ + ga_append(gap, '\''); \ + for (size_t i = 0; i < len_; i++) { \ + if (buf_[i] == '\'') { \ + num_quotes++; \ + ga_append(gap, '\''); \ + } \ + ga_append(gap, buf_[i]); \ + } \ + ga_append(gap, '\''); \ + } \ + } while (0) + +#define CONV_STR_STRING(buf, len) \ + CONV_STRING(buf, len) + +#define CONV_EXT_STRING(buf, len, type) + +#define CONV_NUMBER(num) \ + do { \ + char numbuf[NUMBUFLEN]; \ + vim_snprintf(numbuf, NUMBUFLEN - 1, "%" PRId64, (int64_t) (num)); \ + ga_concat(gap, numbuf); \ + } while (0) + +#define CONV_FLOAT(flt) \ + do { \ + const float_T flt_ = (flt); \ + switch (fpclassify(flt_)) { \ + case FP_NAN: { \ + ga_concat(gap, (char_u *) "str2float('nan')"); \ + break; \ + } \ + case FP_INFINITE: { \ + if (flt_ < 0) { \ + ga_append(gap, '-'); \ + } \ + ga_concat(gap, (char_u *) "str2float('inf')"); \ + break; \ + } \ + default: { \ + char numbuf[NUMBUFLEN]; \ + vim_snprintf(numbuf, NUMBUFLEN - 1, "%g", flt_); \ + ga_concat(gap, (char_u *) numbuf); \ + } \ + } \ + } while (0) + +#define CONV_FUNC(fun) \ + do { \ + ga_concat(gap, "function("); \ + CONV_STRING(fun, STRLEN(fun)); \ + ga_append(gap, ')'); \ + } while (0) + +#define CONV_EMPTY_LIST() \ + ga_concat(gap, "[]") + +#define CONV_LIST_START(lst) \ + ga_append(gap, '[') + +#define CONV_EMPTY_DICT() \ + ga_concat(gap, "{}") + +#define CONV_NIL() \ + ga_concat(gap, "v:null") + +#define CONV_BOOL(num) \ + ga_concat(gap, ((num)? "v:true": "v:false")) + +#define CONV_NONE_VAL() \ + ga_concat(gap, "v:none") + +#define CONV_UNSIGNED_NUMBER(num) + +#define CONV_DICT_START(len) \ + ga_append(gap, '{') + +#define CONV_DICT_END() \ + ga_append(gap, '}') + +#define CONV_DICT_AFTER_KEY() \ + ga_concat(gap, ": ") + +#define CONV_DICT_BETWEEN_ITEMS() \ + ga_concat(gap, ", ") + +#define CONV_SPECIAL_DICT_KEY_CHECK(kv_pair) + +#define CONV_LIST_END(lst) \ + ga_append(gap, ']') + +#define CONV_LIST_BETWEEN_ITEMS() \ + CONV_DICT_BETWEEN_ITEMS() + +#define CONV_RECURSE(val, conv_type) \ + do { \ + if (!did_echo_string_emsg) { \ + /* Only give this message once for a recursive call to avoid */ \ + /* flooding the user with errors. */ \ + did_echo_string_emsg = true; \ + EMSG(_("E724: unable to correctly dump variable " \ + "with self-referencing container")); \ + } \ + char ebuf[NUMBUFLEN + 7]; \ + size_t backref = 0; \ + for (; backref < kv_size(*mpstack); backref++) { \ + const MPConvStackVal mpval = kv_a(MPConvStackVal, *mpstack, backref); \ + if (mpval.type == conv_type) { \ + if (conv_type == kMPConvDict) { \ + if ((void *) mpval.data.d.dict == (void *) (val)) { \ + break; \ + } \ + } else if (conv_type == kMPConvList) { \ + if ((void *) mpval.data.l.list == (void *) (val)) { \ + break; \ + } \ + } \ + } \ + } \ + vim_snprintf(ebuf, NUMBUFLEN + 6, "{E724@%zu}", backref); \ + ga_concat(gap, &ebuf[0]); \ + return OK; \ + } while (0) + +#define CONV_ALLOW_SPECIAL false + +DEFINE_VIML_CONV_FUNCTIONS(static, string, garray_T *const, gap) + +#undef CONV_RECURSE +#define CONV_RECURSE(val, conv_type) \ + do { \ + char ebuf[NUMBUFLEN + 7]; \ + size_t backref = 0; \ + for (; backref < kv_size(*mpstack); backref++) { \ + const MPConvStackVal mpval = kv_a(MPConvStackVal, *mpstack, backref); \ + if (mpval.type == conv_type) { \ + if (conv_type == kMPConvDict) { \ + if ((void *) mpval.data.d.dict == (void *) val) { \ + break; \ + } \ + } else if (conv_type == kMPConvList) { \ + if ((void *) mpval.data.l.list == (void *) val) { \ + break; \ + } \ + } \ + } \ + } \ + if (conv_type == kMPConvDict) { \ + vim_snprintf(ebuf, NUMBUFLEN + 6, "{...@%zu}", backref); \ + } else { \ + vim_snprintf(ebuf, NUMBUFLEN + 6, "[...@%zu]", backref); \ + } \ + ga_concat(gap, &ebuf[0]); \ + return OK; \ + } while (0) + +DEFINE_VIML_CONV_FUNCTIONS(, echo, garray_T *const, gap) + +#undef CONV_RECURSE +#define CONV_RECURSE(val, conv_type) \ + do { \ + if (!did_echo_string_emsg) { \ + /* Only give this message once for a recursive call to avoid */ \ + /* flooding the user with errors. */ \ + did_echo_string_emsg = true; \ + EMSG(_("E724: unable to correctly dump variable " \ + "with self-referencing container")); \ + } \ + return OK; \ + } while (0) + +#undef CONV_ALLOW_SPECIAL +#define CONV_ALLOW_SPECIAL true + +#undef CONV_NIL +#define CONV_NIL() \ + ga_concat(gap, "null") + +#undef CONV_BOOL +#define CONV_BOOL(num) \ + ga_concat(gap, ((num)? "true": "false")) + +#undef CONV_UNSIGNED_NUMBER +#define CONV_UNSIGNED_NUMBER(num) \ + do { \ + char numbuf[NUMBUFLEN]; \ + vim_snprintf(numbuf, sizeof(numbuf), "%" PRIu64, (num)); \ + ga_concat(gap, numbuf); \ + } while (0) + +#undef CONV_FLOAT +#define CONV_FLOAT(flt) \ + do { \ + char numbuf[NUMBUFLEN]; \ + vim_snprintf(numbuf, NUMBUFLEN - 1, "%g", (flt)); \ + ga_concat(gap, numbuf); \ + } while (0) + +/// Last used p_enc value +/// +/// Generic pointer: it is not used as a string, only pointer comparisons are +/// performed. Must not be freed. +static const void *last_p_enc = NULL; + +/// Conversion setup for converting from last_p_enc to UTF-8 +static vimconv_T p_enc_conv = { + .vc_type = CONV_NONE, +}; + +/// Escape sequences used in JSON +static const char escapes[][3] = { + [BS] = "\\b", + [TAB] = "\\t", + [NL] = "\\n", + [CAR] = "\\r", + ['"'] = "\\\"", + ['\\'] = "\\\\", +}; + +static const char xdigits[] = "0123456789ABCDEF"; + +/// Convert given string to JSON string +/// +/// @param[out] gap Garray where result will be saved. +/// @param[in] buf Converted string. +/// @param[in] len Converted string length. +/// +/// @return OK in case of success, FAIL otherwise. +static inline int convert_to_json_string(garray_T *const gap, + const char *const buf, + const size_t len) + FUNC_ATTR_NONNULL_ARG(1) FUNC_ATTR_ALWAYS_INLINE +{ + const char *buf_ = buf; + if (buf_ == NULL) { + ga_concat(gap, "\"\""); + } else { + size_t len_ = len; + char *tofree = NULL; + if (last_p_enc != (const void *) p_enc) { + convert_setup(&p_enc_conv, p_enc, "utf-8"); + p_enc_conv.vc_fail = true; + last_p_enc = p_enc; + } + if (p_enc_conv.vc_type != CONV_NONE) { + tofree = string_convert(&p_enc_conv, buf_, &len_); + if (tofree == NULL) { + EMSG2(_("E474: Failed to convert string \"%s\" to UTF-8"), buf_); + return FAIL; + } + buf_ = tofree; + } + size_t str_len = 0; + for (size_t i = 0; i < len_;) { + const int ch = utf_ptr2char(buf + i); + const size_t shift = (ch == 0? 1: utf_ptr2len(buf + i)); + assert(shift > 0); + i += shift; + switch (ch) { + case BS: + case TAB: + case NL: + case FF: + case CAR: + case '"': + case '\\': { + str_len += 2; + break; + } + default: { + if (ch > 0x7F && shift == 1) { + EMSG2(_("E474: String \"%s\" contains byte that does not start any " + "UTF-8 character"), buf_); + return FAIL; + } else if ((0xD800 <= ch && ch <= 0xDB7F) + || (0xDC00 <= ch && ch <= 0xDFFF)) { + EMSG2(_("E474: UTF-8 string contains code point which belongs " + "to surrogate pairs"), buf_); + return FAIL; + } else if (vim_isprintc(ch)) { + str_len += shift; + } else { + str_len += ((sizeof("\\u1234") - 1) * (1 + (ch > 0xFFFF))); + } + break; + } + } + } + ga_append(gap, '"'); + ga_grow(gap, (int) str_len); + for (size_t i = 0; i < len_;) { + const int ch = utf_ptr2char(buf + i); + const size_t shift = (ch == 0? 1: utf_char2len(ch)); + assert(shift > 0); + // Is false on invalid unicode, but this should already be handled. + assert(ch == 0 || shift == utf_ptr2len(buf + i)); + switch (ch) { + case BS: + case TAB: + case NL: + case FF: + case CAR: + case '"': + case '\\': { + ga_concat_len(gap, escapes[ch], 2); + break; + } + default: { + if (vim_isprintc(ch)) { + ga_concat_len(gap, buf + i, shift); + } else if (ch <= 0xFFFF) { + ga_concat_len(gap, ((const char []) { + '\\', 'u', + xdigits[(ch >> (4 * 3)) & 0xF], + xdigits[(ch >> (4 * 2)) & 0xF], + xdigits[(ch >> (4 * 1)) & 0xF], + xdigits[(ch >> (4 * 0)) & 0xF], + }), sizeof("\\u1234") - 1); + } else { + uint32_t tmp = (uint32_t) ch - 0x010000; + uint16_t hi = 0xD800 + ((tmp >> 10) & 0x03FF); + uint16_t lo = 0xDC00 + ((tmp >> 0) & 0x03FF); + ga_concat_len(gap, ((const char []) { + '\\', 'u', + xdigits[(hi >> (4 * 3)) & 0xF], + xdigits[(hi >> (4 * 2)) & 0xF], + xdigits[(hi >> (4 * 1)) & 0xF], + xdigits[(hi >> (4 * 0)) & 0xF], + '\\', 'u', + xdigits[(lo >> (4 * 3)) & 0xF], + xdigits[(lo >> (4 * 2)) & 0xF], + xdigits[(lo >> (4 * 1)) & 0xF], + xdigits[(lo >> (4 * 0)) & 0xF], + }), (sizeof("\\u1234") - 1) * 2); + } + break; + } + } + i += shift; + } + ga_append(gap, '"'); + xfree(tofree); + } + return OK; +} + +#undef CONV_STRING +#define CONV_STRING(buf, len) \ + do { \ + if (convert_to_json_string(gap, (const char *) (buf), (len)) != OK) { \ + return FAIL; \ + } \ + } while (0) + +#undef CONV_EXT_STRING +#define CONV_EXT_STRING(buf, len, type) \ + do { \ + xfree(buf); \ + EMSG(_("E474: Unable to convert EXT string to JSON")); \ + return FAIL; \ + } while (0) + +#undef CONV_FUNC +#define CONV_FUNC(fun) \ + return conv_error(_("E474: Error while dumping %s, %s: " \ + "attempt to dump function reference"), \ + mpstack, objname) + +/// Check whether given key can be used in jsonencode() +/// +/// @param[in] tv Key to check. +static inline bool check_json_key(const typval_T *const tv) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_PURE + FUNC_ATTR_ALWAYS_INLINE +{ + if (tv->v_type == VAR_STRING) { + return true; + } + if (tv->v_type != VAR_DICT) { + return false; + } + const dict_T *const spdict = tv->vval.v_dict; + if (spdict->dv_hashtab.ht_used != 2) { + return false; + } + const dictitem_T *type_di; + const dictitem_T *val_di; + if ((type_di = dict_find((dict_T *) spdict, (char_u *) "_TYPE", -1)) == NULL + || type_di->di_tv.v_type != VAR_LIST + || (type_di->di_tv.vval.v_list != eval_msgpack_type_lists[kMPString] + && type_di->di_tv.vval.v_list != eval_msgpack_type_lists[kMPBinary]) + || (val_di = dict_find((dict_T *) spdict, (char_u *) "_VAL", -1)) == NULL + || val_di->di_tv.v_type != VAR_LIST) { + return false; + } + if (val_di->di_tv.vval.v_list == NULL) { + return true; + } + for (const listitem_T *li = val_di->di_tv.vval.v_list->lv_first; + li != NULL; li = li->li_next) { + if (li->li_tv.v_type != VAR_STRING) { + return false; + } + } + return true; +} + +#undef CONV_SPECIAL_DICT_KEY_CHECK +#define CONV_SPECIAL_DICT_KEY_CHECK(kv_pair) \ + do { \ + if (!check_json_key(&kv_pair->lv_first->li_tv)) { \ + EMSG(_("E474: Invalid key in special dictionary")); \ + return FAIL; \ + } \ + } while (0) + +#undef CONV_NONE_VAL +#define CONV_NONE_VAL() + +DEFINE_VIML_CONV_FUNCTIONS(static, json, garray_T *const, gap) + +#undef CONV_STRING +#undef CONV_STR_STRING +#undef CONV_EXT_STRING +#undef CONV_NUMBER +#undef CONV_FLOAT +#undef CONV_FUNC +#undef CONV_EMPTY_LIST +#undef CONV_LIST_START +#undef CONV_EMPTY_DICT +#undef CONV_NIL +#undef CONV_BOOL +#undef CONV_NONE_VAL +#undef CONV_UNSIGNED_NUMBER +#undef CONV_DICT_START +#undef CONV_DICT_END +#undef CONV_DICT_AFTER_KEY +#undef CONV_DICT_BETWEEN_ITEMS +#undef CONV_SPECIAL_DICT_KEY_CHECK +#undef CONV_LIST_END +#undef CONV_LIST_BETWEEN_ITEMS +#undef CONV_RECURSE +#undef CONV_ALLOW_SPECIAL + +/// Return a string with the string representation of a variable. +/// Puts quotes around strings, so that they can be parsed back by eval(). +/// +/// @param[in] tv typval_T to convert. +/// @param[out] len Location where length of the result will be saved. +/// +/// @return String representation of the variable or NULL. +char *encode_tv2string(typval_T *tv, size_t *len) + FUNC_ATTR_NONNULL_ARG(1) FUNC_ATTR_MALLOC +{ + garray_T ga; + ga_init(&ga, (int)sizeof(char), 80); + encode_vim_to_string(&ga, tv, "encode_tv2string() argument"); + did_echo_string_emsg = false; + if (len != NULL) { + *len = (size_t) ga.ga_len; + } + ga_append(&ga, '\0'); + return (char *) ga.ga_data; +} + +/// Return a string with the string representation of a variable. +/// Does not put quotes around strings, as ":echo" displays values. +/// +/// @param[in] tv typval_T to convert. +/// @param[out] len Location where length of the result will be saved. +/// +/// @return String representation of the variable or NULL. +char *encode_tv2echo(typval_T *tv, size_t *len) + FUNC_ATTR_NONNULL_ARG(1) FUNC_ATTR_MALLOC +{ + garray_T ga; + ga_init(&ga, (int)sizeof(char), 80); + if (tv->v_type == VAR_STRING || tv->v_type == VAR_FUNC) { + if (tv->vval.v_string != NULL) { + ga_concat(&ga, tv->vval.v_string); + } + } else { + encode_vim_to_echo(&ga, tv, ":echo argument"); + } + if (len != NULL) { + *len = (size_t) ga.ga_len; + } + ga_append(&ga, '\0'); + return (char *) ga.ga_data; +} + +/// Return a string with the string representation of a variable. +/// Puts quotes around strings, so that they can be parsed back by eval(). +/// +/// @param[in] tv typval_T to convert. +/// @param[out] len Location where length of the result will be saved. +/// +/// @return String representation of the variable or NULL. +char *encode_tv2json(typval_T *tv, size_t *len) + FUNC_ATTR_NONNULL_ARG(1) FUNC_ATTR_MALLOC +{ + garray_T ga; + ga_init(&ga, (int)sizeof(char), 80); + encode_vim_to_json(&ga, tv, "encode_tv2json() argument"); + did_echo_string_emsg = false; + if (len != NULL) { + *len = (size_t) ga.ga_len; + } + ga_append(&ga, '\0'); + return (char *) ga.ga_data; +} + +#define CONV_STRING(buf, len) \ + do { \ + if (buf == NULL) { \ + msgpack_pack_bin(packer, 0); \ + } else { \ + const size_t len_ = (len); \ + msgpack_pack_bin(packer, len_); \ + msgpack_pack_bin_body(packer, buf, len_); \ + } \ + } while (0) + +#define CONV_STR_STRING(buf, len) \ + do { \ + if (buf == NULL) { \ + msgpack_pack_str(packer, 0); \ + } else { \ + const size_t len_ = (len); \ + msgpack_pack_str(packer, len_); \ + msgpack_pack_str_body(packer, buf, len_); \ + } \ + } while (0) + +#define CONV_EXT_STRING(buf, len, type) \ + do { \ + if (buf == NULL) { \ + msgpack_pack_ext(packer, 0, (int8_t) type); \ + } else { \ + const size_t len_ = (len); \ + msgpack_pack_ext(packer, len_, (int8_t) type); \ + msgpack_pack_ext_body(packer, buf, len_); \ + } \ + } while (0) + +#define CONV_NUMBER(num) \ + msgpack_pack_int64(packer, (int64_t) (num)) + +#define CONV_FLOAT(flt) \ + msgpack_pack_double(packer, (double) (flt)) + +#define CONV_FUNC(fun) \ + return conv_error(_("E951: Error while dumping %s, %s: " \ + "attempt to dump function reference"), \ + mpstack, objname) + +#define CONV_EMPTY_LIST() \ + msgpack_pack_array(packer, 0) + +#define CONV_LIST_START(lst) \ + msgpack_pack_array(packer, (size_t) (lst)->lv_len) + +#define CONV_EMPTY_DICT() \ + msgpack_pack_map(packer, 0) + +#define CONV_NIL() \ + msgpack_pack_nil(packer) + +#define CONV_NONE_VAL() \ + return conv_error(_("E953: Attempt to convert v:none in %s, %s"), \ + mpstack, objname) + +#define CONV_BOOL(num) \ + do { \ + if ((num)) { \ + msgpack_pack_true(packer); \ + } else { \ + msgpack_pack_false(packer); \ + } \ + } while (0) + +#define CONV_UNSIGNED_NUMBER(num) \ + msgpack_pack_uint64(packer, (num)) + +#define CONV_DICT_START(len) \ + msgpack_pack_map(packer, (size_t) (len)) + +#define CONV_DICT_END() + +#define CONV_DICT_AFTER_KEY() + +#define CONV_DICT_BETWEEN_ITEMS() + +#define CONV_SPECIAL_DICT_KEY_CHECK(kv_pair) + +#define CONV_LIST_END(lst) + +#define CONV_LIST_BETWEEN_ITEMS() + +#define CONV_RECURSE(val, conv_type) \ + return conv_error(_("E952: Unable to dump %s: " \ + "container references itself in %s"), \ + mpstack, objname) + +#define CONV_ALLOW_SPECIAL true + +DEFINE_VIML_CONV_FUNCTIONS(, msgpack, msgpack_packer *const, packer) + +#undef CONV_STRING +#undef CONV_STR_STRING +#undef CONV_EXT_STRING +#undef CONV_NUMBER +#undef CONV_FLOAT +#undef CONV_FUNC +#undef CONV_EMPTY_LIST +#undef CONV_LIST_START +#undef CONV_EMPTY_DICT +#undef CONV_NIL +#undef CONV_BOOL +#undef CONV_NONE_VAL +#undef CONV_UNSIGNED_NUMBER +#undef CONV_DICT_START +#undef CONV_DICT_END +#undef CONV_DICT_AFTER_KEY +#undef CONV_DICT_BETWEEN_ITEMS +#undef CONV_SPECIAL_DICT_KEY_CHECK +#undef CONV_LIST_END +#undef CONV_LIST_BETWEEN_ITEMS +#undef CONV_RECURSE +#undef CONV_ALLOW_SPECIAL diff --git a/src/nvim/eval/encode.h b/src/nvim/eval/encode.h new file mode 100644 index 0000000000..0e60c96155 --- /dev/null +++ b/src/nvim/eval/encode.h @@ -0,0 +1,60 @@ +#ifndef NVIM_EVAL_ENCODE_H +#define NVIM_EVAL_ENCODE_H + +#include + +#include + +#include "nvim/eval.h" +#include "nvim/garray.h" +#include "nvim/vim.h" // For STRLEN + +/// Convert VimL value to msgpack string +/// +/// @param[out] packer Packer to save results in. +/// @param[in] tv Dumped value. +/// @param[in] objname Object name, used for error message. +/// +/// @return OK in case of success, FAIL otherwise. +int encode_vim_to_msgpack(msgpack_packer *const packer, + typval_T *const tv, + const char *const objname); + +/// Convert VimL value to :echo output +/// +/// @param[out] packer Packer to save results in. +/// @param[in] tv Dumped value. +/// @param[in] objname Object name, used for error message. +/// +/// @return OK in case of success, FAIL otherwise. +int encode_vim_to_echo(garray_T *const packer, + typval_T *const tv, + const char *const objname); + +/// Structure defining state for read_from_list() +typedef struct { + const listitem_T *li; ///< Item currently read. + size_t offset; ///< Byte offset inside the read item. + size_t li_length; ///< Length of the string inside the read item. +} ListReaderState; + +/// Initialize ListReaderState structure +static inline ListReaderState encode_init_lrstate(const list_T *const list) + FUNC_ATTR_NONNULL_ALL +{ + return (ListReaderState) { + .li = list->lv_first, + .offset = 0, + .li_length = (list->lv_first->li_tv.vval.v_string == NULL + ? 0 + : STRLEN(list->lv_first->li_tv.vval.v_string)), + }; +} + +/// Array mapping values from SpecialVarValue enum to names +extern const char *const encode_special_var_names[]; + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "eval/encode.h.generated.h" +#endif +#endif // NVIM_EVAL_ENCODE_H -- cgit From 700b32a2b3af859299cbe92914f1a4cd800de724 Mon Sep 17 00:00:00 2001 From: ZyX Date: Wed, 3 Feb 2016 19:43:48 +0300 Subject: eval: Move some decoding functions to eval/decode.c --- src/nvim/eval/decode.c | 753 +++++++++++++++++++++++++++++++++++++++++++++++++ src/nvim/eval/decode.h | 13 + src/nvim/eval/encode.c | 2 +- 3 files changed, 767 insertions(+), 1 deletion(-) create mode 100644 src/nvim/eval/decode.c create mode 100644 src/nvim/eval/decode.h (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c new file mode 100644 index 0000000000..e2b2574d83 --- /dev/null +++ b/src/nvim/eval/decode.c @@ -0,0 +1,753 @@ +#include + +#include + +#include "nvim/eval_defs.h" +#include "nvim/eval.h" +#include "nvim/eval/encode.h" +#include "nvim/ascii.h" +#include "nvim/message.h" +#include "nvim/charset.h" // vim_str2nr +#include "nvim/lib/kvec.h" +#include "nvim/vim.h" // OK, FAIL + +/// Helper structure for container_struct +typedef struct { + size_t stack_index; ///< Index of current container in stack. + typval_T container; ///< Container. Either VAR_LIST, VAR_DICT or VAR_LIST + ///< which is _VAL from special dictionary. +} ContainerStackItem; + +typedef kvec_t(typval_T) ValuesStack; +typedef kvec_t(ContainerStackItem) ContainerStack; + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "eval/decode.c.generated.h" +#endif + +/// Helper function used for working with stack vectors used by JSON decoder +/// +/// @param[in] obj New object. +/// @param[out] stack Object stack. +/// @param[out] container_stack Container objects stack. +/// @param[in] p Position in string which is currently being parsed. +/// +/// @return OK in case of success, FAIL in case of error. +static inline int json_decoder_pop(typval_T obj, ValuesStack *const stack, + ContainerStack *const container_stack, + const char *const p) + FUNC_ATTR_NONNULL_ALL +{ + if (kv_size(*container_stack) == 0) { + kv_push(typval_T, *stack, obj); + return OK; + } + ContainerStackItem last_container = kv_last(*container_stack); + if (obj.v_type == last_container.container.v_type + // vval.v_list and vval.v_dict should have the same size and offset + && ((void *) obj.vval.v_list + == (void *) last_container.container.vval.v_list)) { + kv_pop(*container_stack); + last_container = kv_last(*container_stack); + } + if (last_container.container.v_type == VAR_LIST) { + listitem_T *obj_li = listitem_alloc(); + obj_li->li_tv = obj; + list_append(last_container.container.vval.v_list, obj_li); + } else if (last_container.stack_index == kv_size(*stack) - 2) { + typval_T key = kv_pop(*stack); + if (key.v_type != VAR_STRING) { + assert(false); + } else if (key.vval.v_string == NULL || *key.vval.v_string == NUL) { + // TODO: fall back to special dict in case of empty key + EMSG(_("E474: Empty key")); + clear_tv(&obj); + return FAIL; + } + dictitem_T *obj_di = dictitem_alloc(key.vval.v_string); + clear_tv(&key); + if (dict_add(last_container.container.vval.v_dict, obj_di) + == FAIL) { + // TODO: fall back to special dict in case of duplicate keys + EMSG(_("E474: Duplicate key")); + dictitem_free(obj_di); + clear_tv(&obj); + return FAIL; + } + obj_di->di_tv = obj; + } else { + // Object with key only + if (obj.v_type != VAR_STRING) { + EMSG2(_("E474: Expected string key: %s"), p); + clear_tv(&obj); + return FAIL; + } + kv_push(typval_T, *stack, obj); + } + return OK; +} + +/// Convert JSON string into VimL object +/// +/// @param[in] buf String to convert. UTF-8 encoding is assumed. +/// @param[in] len Length of the string. +/// @param[out] rettv Location where to save results. +/// +/// @return OK in case of success, FAIL otherwise. +int json_decode_string(const char *const buf, const size_t len, + typval_T *const rettv) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT +{ + vimconv_T conv; + convert_setup(&conv, (char_u *) "utf-8", p_enc); + conv.vc_fail = true; + int ret = OK; + ValuesStack stack; + kv_init(stack); + ContainerStack container_stack; + kv_init(container_stack); + rettv->v_type = VAR_UNKNOWN; + const char *const e = buf + len; + bool didcomma = false; + bool didcolon = false; +#define POP(obj) \ + do { \ + if (json_decoder_pop(obj, &stack, &container_stack, p) == FAIL) { \ + goto json_decode_string_fail; \ + } \ + } while (0) + const char *p = buf; + for (; p < e; p++) { + switch (*p) { + case '}': + case ']': { + if (kv_size(container_stack) == 0) { + EMSG2(_("E474: No container to close: %s"), p); + goto json_decode_string_fail; + } + ContainerStackItem last_container = kv_last(container_stack); + if (*p == '}' && last_container.container.v_type != VAR_DICT) { + EMSG2(_("E474: Closing list with figure brace: %s"), p); + goto json_decode_string_fail; + } else if (*p == ']' && last_container.container.v_type != VAR_LIST) { + EMSG2(_("E474: Closing dictionary with bracket: %s"), p); + goto json_decode_string_fail; + } else if (didcomma) { + EMSG2(_("E474: Trailing comma: %s"), p); + goto json_decode_string_fail; + } else if (didcolon) { + EMSG2(_("E474: Expected value after colon: %s"), p); + goto json_decode_string_fail; + } else if (last_container.stack_index != kv_size(stack) - 1) { + assert(last_container.stack_index < kv_size(stack) - 1); + EMSG2(_("E474: Expected value: %s"), p); + goto json_decode_string_fail; + } + if (kv_size(stack) == 1) { + p++; + kv_pop(container_stack); + goto json_decode_string_after_cycle; + } else { + typval_T obj = kv_pop(stack); + POP(obj); + break; + } + } + case ',': { + if (kv_size(container_stack) == 0) { + EMSG2(_("E474: Comma not inside container: %s"), p); + goto json_decode_string_fail; + } + ContainerStackItem last_container = kv_last(container_stack); + if (didcomma) { + EMSG2(_("E474: Duplicate comma: %s"), p); + goto json_decode_string_fail; + } else if (didcolon) { + EMSG2(_("E474: Comma after colon: %s"), p); + goto json_decode_string_fail; + } if (last_container.container.v_type == VAR_DICT + && last_container.stack_index != kv_size(stack) - 1) { + EMSG2(_("E474: Using comma in place of colon: %s"), p); + goto json_decode_string_fail; + } else if ((last_container.container.v_type == VAR_DICT + && (last_container.container.vval.v_dict->dv_hashtab.ht_used + == 0)) + || (last_container.container.v_type == VAR_LIST + && last_container.container.vval.v_list->lv_len == 0)) { + EMSG2(_("E474: Leading comma: %s"), p); + goto json_decode_string_fail; + } + didcomma = true; + continue; + } + case ':': { + if (kv_size(container_stack) == 0) { + EMSG2(_("E474: Colon not inside container: %s"), p); + goto json_decode_string_fail; + } + ContainerStackItem last_container = kv_last(container_stack); + if (last_container.container.v_type != VAR_DICT) { + EMSG2(_("E474: Using colon not in dictionary: %s"), p); + goto json_decode_string_fail; + } else if (last_container.stack_index != kv_size(stack) - 2) { + EMSG2(_("E474: Unexpected colon: %s"), p); + goto json_decode_string_fail; + } else if (didcomma) { + EMSG2(_("E474: Colon after comma: %s"), p); + goto json_decode_string_fail; + } else if (didcolon) { + EMSG2(_("E474: Duplicate colon: %s"), p); + goto json_decode_string_fail; + } + didcolon = true; + continue; + } + case ' ': + case TAB: + case NL: { + continue; + } + case 'n': { + if (strncmp(p + 1, "ull", 3) != 0) { + EMSG2(_("E474: Expected null: %s"), p); + goto json_decode_string_fail; + } + p += 3; + POP(get_vim_var_tv(VV_NULL)); + break; + } + case 't': { + if (strncmp(p + 1, "rue", 3) != 0) { + EMSG2(_("E474: Expected true: %s"), p); + goto json_decode_string_fail; + } + p += 3; + POP(get_vim_var_tv(VV_TRUE)); + break; + } + case 'f': { + if (strncmp(p + 1, "alse", 4) != 0) { + EMSG2(_("E474: Expected false: %s"), p); + goto json_decode_string_fail; + } + p += 4; + POP(get_vim_var_tv(VV_FALSE)); + break; + } + case '"': { + size_t len = 0; + const char *s; + for (s = ++p; p < e && *p != '"'; p++) { + if (*p == '\\') { + p++; + if (p == e) { + EMSG2(_("E474: Unfinished escape sequence: %s"), buf); + goto json_decode_string_fail; + } + switch (*p) { + case 'u': { + if (p + 4 >= e) { + EMSG2(_("E474: Unfinished unicode escape sequence: %s"), buf); + goto json_decode_string_fail; + } else if (!ascii_isxdigit(p[1]) + || !ascii_isxdigit(p[2]) + || !ascii_isxdigit(p[3]) + || !ascii_isxdigit(p[4])) { + EMSG2(_("E474: Expected four hex digits after \\u: %s"), + p - 1); + goto json_decode_string_fail; + } + // One UTF-8 character below U+10000 can take up to 3 bytes + len += 3; + p += 4; + break; + } + case '\\': + case '/': + case '"': + case 't': + case 'b': + case 'n': + case 'r': + case 'f': { + len++; + break; + } + default: { + EMSG2(_("E474: Unknown escape sequence: %s"), p - 1); + goto json_decode_string_fail; + } + } + } else { + len++; + } + } + if (*p != '"') { + EMSG2(_("E474: Expected string end: %s"), buf); + goto json_decode_string_fail; + } + char *str = xmalloc(len + 1); + uint16_t fst_in_pair = 0; + char *str_end = str; + for (const char *t = s; t < p; t++) { + if (t[0] != '\\' || t[1] != 'u') { + if (fst_in_pair != 0) { + str_end += utf_char2bytes((int) fst_in_pair, (char_u *) str_end); + fst_in_pair = 0; + } + } + if (*t == '\\') { + t++; + switch (*t) { + case 'u': { + char ubuf[] = { t[1], t[2], t[3], t[4], 0 }; + t += 4; + unsigned long ch; + vim_str2nr((char_u *) ubuf, NULL, NULL, 0, 0, 2, NULL, &ch); + if (0xD800UL <= ch && ch <= 0xDB7FUL) { + fst_in_pair = (uint16_t) ch; + } else if (0xDC00ULL <= ch && ch <= 0xDB7FUL) { + if (fst_in_pair != 0) { + int full_char = ( + (int) (ch - 0xDC00UL) + + (((int) (fst_in_pair - 0xD800)) << 10) + ); + str_end += utf_char2bytes(full_char, (char_u *) str_end); + } + } else { + str_end += utf_char2bytes((int) ch, (char_u *) str_end); + } + break; + } + case '\\': + case '/': + case '"': + case 't': + case 'b': + case 'n': + case 'r': + case 'f': { + static const char escapes[] = { + ['\\'] = '\\', + ['/'] = '/', + ['"'] = '"', + ['t'] = TAB, + ['b'] = BS, + ['n'] = NL, + ['r'] = CAR, + ['f'] = FF, + }; + *str_end++ = escapes[(int) *t]; + break; + } + default: { + assert(false); + } + } + } else { + *str_end++ = *t; + } + } + if (fst_in_pair != 0) { + str_end += utf_char2bytes((int) fst_in_pair, (char_u *) str_end); + } + if (conv.vc_type != CONV_NONE) { + size_t len = (size_t) (str_end - str); + char *const new_str = (char *) string_convert(&conv, (char_u *) str, + &len); + if (new_str == NULL) { + EMSG2(_("E474: Failed to convert string \"%s\" from UTF-8"), str); + xfree(str); + goto json_decode_string_fail; + } + xfree(str); + str = new_str; + str_end = new_str + len; + } + *str_end = NUL; + // TODO: return special string in case of NUL bytes + POP(((typval_T) { + .v_type = VAR_STRING, + .vval = { .v_string = (char_u *) str, }, + })); + break; + } + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + // a.bE[+-]exp + const char *const s = p; + const char *ints = NULL; + const char *fracs = NULL; + const char *exps = NULL; + if (*p == '-') { + p++; + } + ints = p; + while (p < e && ascii_isdigit(*p)) { + p++; + } + if (p < e && *p == '.') { + p++; + fracs = p; + while (p < e && ascii_isdigit(*p)) { + p++; + } + if (p < e && (*p == 'e' || *p == 'E')) { + p++; + if (p < e && (*p == '-' || *p == '+')) { + p++; + } + exps = p; + while (p < e && ascii_isdigit(*p)) { + p++; + } + } + } + if (p == ints) { + EMSG2(_("E474: Missing number after minus sign: %s"), s); + goto json_decode_string_fail; + } else if (p == fracs) { + EMSG2(_("E474: Missing number after decimal dot: %s"), s); + goto json_decode_string_fail; + } else if (p == exps) { + EMSG2(_("E474: Missing exponent: %s"), s); + goto json_decode_string_fail; + } + typval_T tv = { + .v_type = VAR_NUMBER, + .v_lock = VAR_UNLOCKED, + }; + if (fracs) { + // Convert floating-point number + (void) string2float(s, &tv.vval.v_float); + tv.v_type = VAR_FLOAT; + } else { + // Convert integer + long nr; + vim_str2nr((char_u *) s, NULL, NULL, 0, 0, 0, &nr, NULL); + tv.vval.v_number = (varnumber_T) nr; + } + POP(tv); + p--; + break; + } + case '[': { + list_T *list = list_alloc(); + list->lv_refcount++; + typval_T tv = { + .v_type = VAR_LIST, + .v_lock = VAR_UNLOCKED, + .vval = { .v_list = list }, + }; + kv_push(ContainerStackItem, container_stack, ((ContainerStackItem) { + .stack_index = kv_size(stack), + .container = tv, + })); + kv_push(typval_T, stack, tv); + break; + } + case '{': { + dict_T *dict = dict_alloc(); + dict->dv_refcount++; + typval_T tv = { + .v_type = VAR_DICT, + .v_lock = VAR_UNLOCKED, + .vval = { .v_dict = dict }, + }; + kv_push(ContainerStackItem, container_stack, ((ContainerStackItem) { + .stack_index = kv_size(stack), + .container = tv, + })); + kv_push(typval_T, stack, tv); + break; + } + default: { + EMSG2(_("E474: Unidentified byte: %s"), p); + goto json_decode_string_fail; + } + } + didcomma = false; + didcolon = false; + if (kv_size(container_stack) == 0) { + p++; + break; + } + } +#undef POP +json_decode_string_after_cycle: + for (; p < e; p++) { + switch (*p) { + case NL: + case ' ': + case TAB: { + break; + } + default: { + EMSG2(_("E474: Trailing characters: %s"), p); + goto json_decode_string_fail; + } + } + } + if (kv_size(stack) > 1 || kv_size(container_stack)) { + EMSG2(_("E474: Unexpected end of input: %s"), buf); + goto json_decode_string_fail; + } + goto json_decode_string_ret; +json_decode_string_fail: + ret = FAIL; + while (kv_size(stack)) { + clear_tv(&kv_pop(stack)); + } +json_decode_string_ret: + if (ret != FAIL) { + assert(kv_size(stack) == 1); + *rettv = kv_pop(stack); + } + kv_destroy(stack); + kv_destroy(container_stack); + return ret; +} + +/// Convert msgpack object to a VimL one +int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT +{ +#define INIT_SPECIAL_DICT(tv, type, val) \ + do { \ + dict_T *const dict = dict_alloc(); \ + dictitem_T *const type_di = dictitem_alloc((char_u *) "_TYPE"); \ + type_di->di_tv.v_type = VAR_LIST; \ + type_di->di_tv.v_lock = 0; \ + type_di->di_tv.vval.v_list = (list_T *) eval_msgpack_type_lists[type]; \ + type_di->di_tv.vval.v_list->lv_refcount++; \ + dict_add(dict, type_di); \ + dictitem_T *const val_di = dictitem_alloc((char_u *) "_VAL"); \ + val_di->di_tv = val; \ + dict_add(dict, val_di); \ + tv->v_type = VAR_DICT; \ + dict->dv_refcount++; \ + tv->vval.v_dict = dict; \ + } while (0) + switch (mobj.type) { + case MSGPACK_OBJECT_NIL: { + INIT_SPECIAL_DICT(rettv, kMPNil, ((typval_T) { + .v_type = VAR_NUMBER, + .v_lock = 0, + .vval = { .v_number = 0 }, + })); + break; + } + case MSGPACK_OBJECT_BOOLEAN: { + INIT_SPECIAL_DICT(rettv, kMPBoolean, + ((typval_T) { + .v_type = VAR_NUMBER, + .v_lock = 0, + .vval = { + .v_number = (varnumber_T) mobj.via.boolean, + }, + })); + break; + } + case MSGPACK_OBJECT_POSITIVE_INTEGER: { + if (mobj.via.u64 <= VARNUMBER_MAX) { + *rettv = (typval_T) { + .v_type = VAR_NUMBER, + .v_lock = 0, + .vval = { .v_number = (varnumber_T) mobj.via.u64 }, + }; + } else { + list_T *const list = list_alloc(); + list->lv_refcount++; + INIT_SPECIAL_DICT(rettv, kMPInteger, + ((typval_T) { + .v_type = VAR_LIST, + .v_lock = 0, + .vval = { .v_list = list }, + })); + uint64_t n = mobj.via.u64; + list_append_number(list, 1); + list_append_number(list, (varnumber_T) ((n >> 62) & 0x3)); + list_append_number(list, (varnumber_T) ((n >> 31) & 0x7FFFFFFF)); + list_append_number(list, (varnumber_T) (n & 0x7FFFFFFF)); + } + break; + } + case MSGPACK_OBJECT_NEGATIVE_INTEGER: { + if (mobj.via.i64 >= VARNUMBER_MIN) { + *rettv = (typval_T) { + .v_type = VAR_NUMBER, + .v_lock = 0, + .vval = { .v_number = (varnumber_T) mobj.via.i64 }, + }; + } else { + list_T *const list = list_alloc(); + list->lv_refcount++; + INIT_SPECIAL_DICT(rettv, kMPInteger, + ((typval_T) { + .v_type = VAR_LIST, + .v_lock = 0, + .vval = { .v_list = list }, + })); + uint64_t n = -((uint64_t) mobj.via.i64); + list_append_number(list, -1); + list_append_number(list, (varnumber_T) ((n >> 62) & 0x3)); + list_append_number(list, (varnumber_T) ((n >> 31) & 0x7FFFFFFF)); + list_append_number(list, (varnumber_T) (n & 0x7FFFFFFF)); + } + break; + } + case MSGPACK_OBJECT_FLOAT: { + *rettv = (typval_T) { + .v_type = VAR_FLOAT, + .v_lock = 0, + .vval = { .v_float = mobj.via.f64 }, + }; + break; + } + case MSGPACK_OBJECT_STR: { + list_T *const list = list_alloc(); + list->lv_refcount++; + INIT_SPECIAL_DICT(rettv, kMPString, + ((typval_T) { + .v_type = VAR_LIST, + .v_lock = 0, + .vval = { .v_list = list }, + })); + if (encode_list_write((void *) list, mobj.via.str.ptr, mobj.via.str.size) + == -1) { + return FAIL; + } + break; + } + case MSGPACK_OBJECT_BIN: { + if (memchr(mobj.via.bin.ptr, NUL, mobj.via.bin.size) == NULL) { + *rettv = (typval_T) { + .v_type = VAR_STRING, + .v_lock = 0, + .vval = { .v_string = xmemdupz(mobj.via.bin.ptr, mobj.via.bin.size) }, + }; + break; + } + list_T *const list = list_alloc(); + list->lv_refcount++; + INIT_SPECIAL_DICT(rettv, kMPBinary, + ((typval_T) { + .v_type = VAR_LIST, + .v_lock = 0, + .vval = { .v_list = list }, + })); + if (encode_list_write((void *) list, mobj.via.bin.ptr, mobj.via.bin.size) + == -1) { + return FAIL; + } + break; + } + case MSGPACK_OBJECT_ARRAY: { + list_T *const list = list_alloc(); + list->lv_refcount++; + *rettv = (typval_T) { + .v_type = VAR_LIST, + .v_lock = 0, + .vval = { .v_list = list }, + }; + for (size_t i = 0; i < mobj.via.array.size; i++) { + listitem_T *const li = listitem_alloc(); + li->li_tv.v_type = VAR_UNKNOWN; + list_append(list, li); + if (msgpack_to_vim(mobj.via.array.ptr[i], &li->li_tv) == FAIL) { + return FAIL; + } + } + break; + } + case MSGPACK_OBJECT_MAP: { + for (size_t i = 0; i < mobj.via.map.size; i++) { + if (mobj.via.map.ptr[i].key.type != MSGPACK_OBJECT_STR + || mobj.via.map.ptr[i].key.via.str.size == 0 + || memchr(mobj.via.map.ptr[i].key.via.str.ptr, NUL, + mobj.via.map.ptr[i].key.via.str.size) != NULL) { + goto msgpack_to_vim_generic_map; + } + } + dict_T *const dict = dict_alloc(); + dict->dv_refcount++; + *rettv = (typval_T) { + .v_type = VAR_DICT, + .v_lock = 0, + .vval = { .v_dict = dict }, + }; + for (size_t i = 0; i < mobj.via.map.size; i++) { + dictitem_T *const di = xmallocz(offsetof(dictitem_T, di_key) + + mobj.via.map.ptr[i].key.via.str.size); + memcpy(&di->di_key[0], mobj.via.map.ptr[i].key.via.str.ptr, + mobj.via.map.ptr[i].key.via.str.size); + di->di_tv.v_type = VAR_UNKNOWN; + if (dict_add(dict, di) == FAIL) { + // Duplicate key: fallback to generic map + clear_tv(rettv); + xfree(di); + goto msgpack_to_vim_generic_map; + } + if (msgpack_to_vim(mobj.via.map.ptr[i].val, &di->di_tv) == FAIL) { + return FAIL; + } + } + break; +msgpack_to_vim_generic_map: {} + list_T *const list = list_alloc(); + list->lv_refcount++; + INIT_SPECIAL_DICT(rettv, kMPMap, + ((typval_T) { + .v_type = VAR_LIST, + .v_lock = 0, + .vval = { .v_list = list }, + })); + for (size_t i = 0; i < mobj.via.map.size; i++) { + list_T *const kv_pair = list_alloc(); + list_append_list(list, kv_pair); + listitem_T *const key_li = listitem_alloc(); + key_li->li_tv.v_type = VAR_UNKNOWN; + list_append(kv_pair, key_li); + listitem_T *const val_li = listitem_alloc(); + val_li->li_tv.v_type = VAR_UNKNOWN; + list_append(kv_pair, val_li); + if (msgpack_to_vim(mobj.via.map.ptr[i].key, &key_li->li_tv) == FAIL) { + return FAIL; + } + if (msgpack_to_vim(mobj.via.map.ptr[i].val, &val_li->li_tv) == FAIL) { + return FAIL; + } + } + break; + } + case MSGPACK_OBJECT_EXT: { + list_T *const list = list_alloc(); + list->lv_refcount++; + list_append_number(list, mobj.via.ext.type); + list_T *const ext_val_list = list_alloc(); + list_append_list(list, ext_val_list); + INIT_SPECIAL_DICT(rettv, kMPExt, + ((typval_T) { + .v_type = VAR_LIST, + .v_lock = 0, + .vval = { .v_list = list }, + })); + if (encode_list_write((void *) ext_val_list, mobj.via.ext.ptr, + mobj.via.ext.size) == -1) { + return FAIL; + } + break; + } + } +#undef INIT_SPECIAL_DICT + return OK; +} diff --git a/src/nvim/eval/decode.h b/src/nvim/eval/decode.h new file mode 100644 index 0000000000..5c25a64f7a --- /dev/null +++ b/src/nvim/eval/decode.h @@ -0,0 +1,13 @@ +#ifndef NVIM_EVAL_DECODE_H +#define NVIM_EVAL_DECODE_H + +#include + +#include + +#include "nvim/eval_defs.h" + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "eval/decode.h.generated.h" +#endif +#endif // NVIM_EVAL_DECODE_H diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index e23e68dc62..359c9b3de7 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -65,7 +65,7 @@ const char *const encode_special_var_names[] = { #endif /// Msgpack callback for writing to readfile()-style list -int msgpack_list_write(void *data, const char *buf, size_t len) +int encode_list_write(void *data, const char *buf, size_t len) { if (len == 0) { return 0; -- cgit From ed6756563ca652581f2be14b9f90e55a3c83461b Mon Sep 17 00:00:00 2001 From: ZyX Date: Wed, 3 Feb 2016 19:56:37 +0300 Subject: eval/decode: Replace INIT_SPECIAL_DICT macros with inline function --- src/nvim/eval/decode.c | 136 +++++++++++++++++++++++++------------------------ 1 file changed, 70 insertions(+), 66 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index e2b2574d83..2d295eabab 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -25,6 +25,34 @@ typedef kvec_t(ContainerStackItem) ContainerStack; # include "eval/decode.c.generated.h" #endif +/// Create special dictionary +/// +/// @param[out] rettv Location where created dictionary will be saved. +/// @param[in] type Type of the dictionary. +/// @param[in] val Value associated with the _VAL key. +static inline void create_special_dict(typval_T *const rettv, + const MessagePackType type, + typval_T val) + FUNC_ATTR_NONNULL_ALL +{ + dict_T *const dict = dict_alloc(); + dictitem_T *const type_di = dictitem_alloc((char_u *) "_TYPE"); + type_di->di_tv.v_type = VAR_LIST; + type_di->di_tv.v_lock = 0; + type_di->di_tv.vval.v_list = (list_T *) eval_msgpack_type_lists[type]; + type_di->di_tv.vval.v_list->lv_refcount++; + dict_add(dict, type_di); + dictitem_T *const val_di = dictitem_alloc((char_u *) "_VAL"); + val_di->di_tv = val; + dict_add(dict, val_di); + dict->dv_refcount++; + *rettv = (typval_T) { + .v_type = VAR_DICT, + .v_lock = VAR_UNLOCKED, + .vval = { .v_dict = dict }, + }; +} + /// Helper function used for working with stack vectors used by JSON decoder /// /// @param[in] obj New object. @@ -521,40 +549,23 @@ json_decode_string_ret: int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT { -#define INIT_SPECIAL_DICT(tv, type, val) \ - do { \ - dict_T *const dict = dict_alloc(); \ - dictitem_T *const type_di = dictitem_alloc((char_u *) "_TYPE"); \ - type_di->di_tv.v_type = VAR_LIST; \ - type_di->di_tv.v_lock = 0; \ - type_di->di_tv.vval.v_list = (list_T *) eval_msgpack_type_lists[type]; \ - type_di->di_tv.vval.v_list->lv_refcount++; \ - dict_add(dict, type_di); \ - dictitem_T *const val_di = dictitem_alloc((char_u *) "_VAL"); \ - val_di->di_tv = val; \ - dict_add(dict, val_di); \ - tv->v_type = VAR_DICT; \ - dict->dv_refcount++; \ - tv->vval.v_dict = dict; \ - } while (0) switch (mobj.type) { case MSGPACK_OBJECT_NIL: { - INIT_SPECIAL_DICT(rettv, kMPNil, ((typval_T) { - .v_type = VAR_NUMBER, - .v_lock = 0, - .vval = { .v_number = 0 }, - })); + create_special_dict(rettv, kMPNil, ((typval_T) { + .v_type = VAR_NUMBER, + .v_lock = 0, + .vval = { .v_number = 0 }, + })); break; } case MSGPACK_OBJECT_BOOLEAN: { - INIT_SPECIAL_DICT(rettv, kMPBoolean, - ((typval_T) { - .v_type = VAR_NUMBER, - .v_lock = 0, - .vval = { - .v_number = (varnumber_T) mobj.via.boolean, - }, - })); + create_special_dict(rettv, kMPBoolean, ((typval_T) { + .v_type = VAR_NUMBER, + .v_lock = 0, + .vval = { + .v_number = (varnumber_T) mobj.via.boolean, + }, + })); break; } case MSGPACK_OBJECT_POSITIVE_INTEGER: { @@ -567,12 +578,11 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) } else { list_T *const list = list_alloc(); list->lv_refcount++; - INIT_SPECIAL_DICT(rettv, kMPInteger, - ((typval_T) { - .v_type = VAR_LIST, - .v_lock = 0, - .vval = { .v_list = list }, - })); + create_special_dict(rettv, kMPInteger, ((typval_T) { + .v_type = VAR_LIST, + .v_lock = 0, + .vval = { .v_list = list }, + })); uint64_t n = mobj.via.u64; list_append_number(list, 1); list_append_number(list, (varnumber_T) ((n >> 62) & 0x3)); @@ -591,12 +601,11 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) } else { list_T *const list = list_alloc(); list->lv_refcount++; - INIT_SPECIAL_DICT(rettv, kMPInteger, - ((typval_T) { - .v_type = VAR_LIST, - .v_lock = 0, - .vval = { .v_list = list }, - })); + create_special_dict(rettv, kMPInteger, ((typval_T) { + .v_type = VAR_LIST, + .v_lock = 0, + .vval = { .v_list = list }, + })); uint64_t n = -((uint64_t) mobj.via.i64); list_append_number(list, -1); list_append_number(list, (varnumber_T) ((n >> 62) & 0x3)); @@ -616,12 +625,11 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) case MSGPACK_OBJECT_STR: { list_T *const list = list_alloc(); list->lv_refcount++; - INIT_SPECIAL_DICT(rettv, kMPString, - ((typval_T) { - .v_type = VAR_LIST, - .v_lock = 0, - .vval = { .v_list = list }, - })); + create_special_dict(rettv, kMPString, ((typval_T) { + .v_type = VAR_LIST, + .v_lock = 0, + .vval = { .v_list = list }, + })); if (encode_list_write((void *) list, mobj.via.str.ptr, mobj.via.str.size) == -1) { return FAIL; @@ -639,12 +647,11 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) } list_T *const list = list_alloc(); list->lv_refcount++; - INIT_SPECIAL_DICT(rettv, kMPBinary, - ((typval_T) { - .v_type = VAR_LIST, - .v_lock = 0, - .vval = { .v_list = list }, - })); + create_special_dict(rettv, kMPBinary, ((typval_T) { + .v_type = VAR_LIST, + .v_lock = 0, + .vval = { .v_list = list }, + })); if (encode_list_write((void *) list, mobj.via.bin.ptr, mobj.via.bin.size) == -1) { return FAIL; @@ -705,12 +712,11 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) msgpack_to_vim_generic_map: {} list_T *const list = list_alloc(); list->lv_refcount++; - INIT_SPECIAL_DICT(rettv, kMPMap, - ((typval_T) { - .v_type = VAR_LIST, - .v_lock = 0, - .vval = { .v_list = list }, - })); + create_special_dict(rettv, kMPMap, ((typval_T) { + .v_type = VAR_LIST, + .v_lock = 0, + .vval = { .v_list = list }, + })); for (size_t i = 0; i < mobj.via.map.size; i++) { list_T *const kv_pair = list_alloc(); list_append_list(list, kv_pair); @@ -735,12 +741,11 @@ msgpack_to_vim_generic_map: {} list_append_number(list, mobj.via.ext.type); list_T *const ext_val_list = list_alloc(); list_append_list(list, ext_val_list); - INIT_SPECIAL_DICT(rettv, kMPExt, - ((typval_T) { - .v_type = VAR_LIST, - .v_lock = 0, - .vval = { .v_list = list }, - })); + create_special_dict(rettv, kMPExt, ((typval_T) { + .v_type = VAR_LIST, + .v_lock = 0, + .vval = { .v_list = list }, + })); if (encode_list_write((void *) ext_val_list, mobj.via.ext.ptr, mobj.via.ext.size) == -1) { return FAIL; @@ -748,6 +753,5 @@ msgpack_to_vim_generic_map: {} break; } } -#undef INIT_SPECIAL_DICT return OK; } -- cgit From cddd7d47c325ab0c06c21fd101efe4a9a1708fca Mon Sep 17 00:00:00 2001 From: ZyX Date: Wed, 3 Feb 2016 20:04:16 +0300 Subject: eval/decode: Make msgpackparse() function use new v: vars --- src/nvim/eval/decode.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 2d295eabab..23e7752ecc 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -551,21 +551,11 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) { switch (mobj.type) { case MSGPACK_OBJECT_NIL: { - create_special_dict(rettv, kMPNil, ((typval_T) { - .v_type = VAR_NUMBER, - .v_lock = 0, - .vval = { .v_number = 0 }, - })); + *rettv = get_vim_var_tv(VV_NULL); break; } case MSGPACK_OBJECT_BOOLEAN: { - create_special_dict(rettv, kMPBoolean, ((typval_T) { - .v_type = VAR_NUMBER, - .v_lock = 0, - .vval = { - .v_number = (varnumber_T) mobj.via.boolean, - }, - })); + *rettv = get_vim_var_tv(mobj.via.boolean ? VV_TRUE : VV_FALSE); break; } case MSGPACK_OBJECT_POSITIVE_INTEGER: { -- cgit From ea82270d30eef2dd716cd158d989f96fbd503ba6 Mon Sep 17 00:00:00 2001 From: ZyX Date: Wed, 3 Feb 2016 21:01:21 +0300 Subject: eval/decode: Fail on control and invalid unicode characters --- src/nvim/eval/decode.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 23e7752ecc..29841db1b6 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -264,8 +264,8 @@ int json_decode_string(const char *const buf, const size_t len, } case '"': { size_t len = 0; - const char *s; - for (s = ++p; p < e && *p != '"'; p++) { + const char *const s = ++p; + while (p < e && *p != '"') { if (*p == '\\') { p++; if (p == e) { @@ -285,9 +285,10 @@ int json_decode_string(const char *const buf, const size_t len, p - 1); goto json_decode_string_fail; } - // One UTF-8 character below U+10000 can take up to 3 bytes + // One UTF-8 character below U+10000 can take up to 3 bytes, + // above up to 6, but they are encoded using two \u escapes. len += 3; - p += 4; + p += 5; break; } case '\\': @@ -299,6 +300,7 @@ int json_decode_string(const char *const buf, const size_t len, case 'r': case 'f': { len++; + p++; break; } default: { @@ -307,7 +309,30 @@ int json_decode_string(const char *const buf, const size_t len, } } } else { - len++; + uint8_t p_byte = (uint8_t) *p; + // unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + if (p_byte < 0x20) { + EMSG2(_("E474: ASCII control characters cannot be present " + "inside string: %s"), p); + goto json_decode_string_fail; + } + const int ch = utf_ptr2char((char_u *) p); + // All characters above U+007F are encoded using two or more bytes + // and thus cannot possibly be equal to *p. But utf_ptr2char({0xFF, + // 0}) will return 0xFF, even though 0xFF cannot start any UTF-8 + // code point at all. + if (ch >= 0x80 && p_byte == ch) { + EMSG2(_("E474: Only UTF-8 strings allowed: %s"), p); + goto json_decode_string_fail; + } else if (ch > 0x10FFFF) { + EMSG2(_("E474: Only UTF-8 code points up to U+10FFFF " + "are allowed to appear unescaped: %s"), p); + goto json_decode_string_fail; + } + const size_t ch_len = (size_t) utf_char2len(ch); + assert(ch_len == (size_t) (ch ? utf_ptr2len((char_u *) p) : 1)); + len += ch_len; + p += ch_len; } } if (*p != '"') { -- cgit From 5814e29cdbe370a417d654dbd18620849aa00a09 Mon Sep 17 00:00:00 2001 From: ZyX Date: Wed, 3 Feb 2016 21:46:01 +0300 Subject: eval/decode: Fix surrogate pairs processing --- src/nvim/eval/decode.c | 27 ++++++++++++++------------- src/nvim/eval/encode.c | 8 ++++---- src/nvim/eval/encode.h | 15 +++++++++++++++ 3 files changed, 33 insertions(+), 17 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 29841db1b6..05dc1c97c4 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -340,12 +340,12 @@ int json_decode_string(const char *const buf, const size_t len, goto json_decode_string_fail; } char *str = xmalloc(len + 1); - uint16_t fst_in_pair = 0; + int fst_in_pair = 0; char *str_end = str; for (const char *t = s; t < p; t++) { if (t[0] != '\\' || t[1] != 'u') { if (fst_in_pair != 0) { - str_end += utf_char2bytes((int) fst_in_pair, (char_u *) str_end); + str_end += utf_char2bytes(fst_in_pair, (char_u *) str_end); fst_in_pair = 0; } } @@ -353,20 +353,21 @@ int json_decode_string(const char *const buf, const size_t len, t++; switch (*t) { case 'u': { - char ubuf[] = { t[1], t[2], t[3], t[4], 0 }; + const char ubuf[] = { t[1], t[2], t[3], t[4], 0 }; t += 4; unsigned long ch; vim_str2nr((char_u *) ubuf, NULL, NULL, 0, 0, 2, NULL, &ch); - if (0xD800UL <= ch && ch <= 0xDB7FUL) { - fst_in_pair = (uint16_t) ch; - } else if (0xDC00ULL <= ch && ch <= 0xDB7FUL) { - if (fst_in_pair != 0) { - int full_char = ( - (int) (ch - 0xDC00UL) - + (((int) (fst_in_pair - 0xD800)) << 10) - ); - str_end += utf_char2bytes(full_char, (char_u *) str_end); - } + if (SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) { + fst_in_pair = (int) ch; + } else if (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END + && fst_in_pair != 0) { + const int full_char = ( + (int) (ch - SURROGATE_LO_START) + + ((fst_in_pair - SURROGATE_HI_START) << 10) + + SURROGATE_FIRST_CHAR + ); + str_end += utf_char2bytes(full_char, (char_u *) str_end); + fst_in_pair = 0; } else { str_end += utf_char2bytes((int) ch, (char_u *) str_end); } diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 359c9b3de7..e44512d803 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -970,7 +970,7 @@ static inline int convert_to_json_string(garray_T *const gap, default: { if (vim_isprintc(ch)) { ga_concat_len(gap, buf + i, shift); - } else if (ch <= 0xFFFF) { + } else if (ch < SURROGATE_FIRST_CHAR) { ga_concat_len(gap, ((const char []) { '\\', 'u', xdigits[(ch >> (4 * 3)) & 0xF], @@ -979,9 +979,9 @@ static inline int convert_to_json_string(garray_T *const gap, xdigits[(ch >> (4 * 0)) & 0xF], }), sizeof("\\u1234") - 1); } else { - uint32_t tmp = (uint32_t) ch - 0x010000; - uint16_t hi = 0xD800 + ((tmp >> 10) & 0x03FF); - uint16_t lo = 0xDC00 + ((tmp >> 0) & 0x03FF); + uint32_t tmp = (uint32_t) ch - SURROGATE_FIRST_CHAR; + uint16_t hi = SURROGATE_HI_START + ((tmp >> 10) & ((1 << 10) - 1)); + uint16_t lo = SURROGATE_LO_END + ((tmp >> 0) & ((1 << 10) - 1)); ga_concat_len(gap, ((const char []) { '\\', 'u', xdigits[(hi >> (4 * 3)) & 0xF], diff --git a/src/nvim/eval/encode.h b/src/nvim/eval/encode.h index 0e60c96155..9bc665253b 100644 --- a/src/nvim/eval/encode.h +++ b/src/nvim/eval/encode.h @@ -54,6 +54,21 @@ static inline ListReaderState encode_init_lrstate(const list_T *const list) /// Array mapping values from SpecialVarValue enum to names extern const char *const encode_special_var_names[]; +/// First codepoint in high surrogates block +#define SURROGATE_HI_START 0xD800 + +/// Last codepoint in high surrogates block +#define SURROGATE_HI_END 0xDBFF + +/// First codepoint in low surrogates block +#define SURROGATE_LO_START 0xDC00 + +/// Last codepoint in low surrogates block +#define SURROGATE_LO_END 0xDFFF + +/// First character that needs to be encoded as surrogate pair +#define SURROGATE_FIRST_CHAR 0x10000 + #ifdef INCLUDE_GENERATED_DECLARATIONS # include "eval/encode.h.generated.h" #endif -- cgit From 634e51d12b90b00dd01b768904d7bf5ade0acbb0 Mon Sep 17 00:00:00 2001 From: ZyX Date: Wed, 3 Feb 2016 21:56:01 +0300 Subject: eval/*: Fix some linter errors --- src/nvim/eval/decode.c | 9 +++--- src/nvim/eval/encode.c | 79 ++++++++++++++++++++++++-------------------------- 2 files changed, 42 insertions(+), 46 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 05dc1c97c4..5a3c5709ad 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -193,8 +193,8 @@ int json_decode_string(const char *const buf, const size_t len, } else if (didcolon) { EMSG2(_("E474: Comma after colon: %s"), p); goto json_decode_string_fail; - } if (last_container.container.v_type == VAR_DICT - && last_container.stack_index != kv_size(stack) - 1) { + } else if (last_container.container.v_type == VAR_DICT + && last_container.stack_index != kv_size(stack) - 1) { EMSG2(_("E474: Using comma in place of colon: %s"), p); goto json_decode_string_fail; } else if ((last_container.container.v_type == VAR_DICT @@ -364,8 +364,7 @@ int json_decode_string(const char *const buf, const size_t len, const int full_char = ( (int) (ch - SURROGATE_LO_START) + ((fst_in_pair - SURROGATE_HI_START) << 10) - + SURROGATE_FIRST_CHAR - ); + + SURROGATE_FIRST_CHAR); str_end += utf_char2bytes(full_char, (char_u *) str_end); fst_in_pair = 0; } else { @@ -763,7 +762,7 @@ msgpack_to_vim_generic_map: {} .vval = { .v_list = list }, })); if (encode_list_write((void *) ext_val_list, mobj.via.ext.ptr, - mobj.via.ext.size) == -1) { + mobj.via.ext.size) == -1) { return FAIL; } break; diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index e44512d803..5c843357f2 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -194,13 +194,13 @@ static int conv_error(const char *const msg, const MPConvStack *const mpstack, /// /// @param[in] list Converted list. /// @param[out] ret_len Resulting buffer length. -/// @param[out] ret_buf Allocated buffer with the result or NULL if ret_len is +/// @param[out] ret_buf Allocated buffer with the result or NULL if ret_len is /// zero. /// /// @return true in case of success, false in case of failure. bool encode_vim_list_to_buf(const list_T *const list, size_t *const ret_len, char **const ret_buf) - FUNC_ATTR_NONNULL_ARG(2,3) FUNC_ATTR_WARN_UNUSED_RESULT + FUNC_ATTR_NONNULL_ARG(2, 3) FUNC_ATTR_WARN_UNUSED_RESULT { size_t len = 0; if (list != NULL) { @@ -336,18 +336,15 @@ static int name##_convert_one_value(firstargtype firstargname, \ } \ CHECK_SELF_REFERENCE(tv->vval.v_list, lv_copyID, kMPConvList); \ CONV_LIST_START(tv->vval.v_list); \ - kv_push( \ - MPConvStackVal, \ - *mpstack, \ - ((MPConvStackVal) { \ - .type = kMPConvList, \ - .data = { \ - .l = { \ - .list = tv->vval.v_list, \ - .li = tv->vval.v_list->lv_first, \ - }, \ - }, \ - })); \ + kv_push(MPConvStackVal, *mpstack, ((MPConvStackVal) { \ + .type = kMPConvList, \ + .data = { \ + .l = { \ + .list = tv->vval.v_list, \ + .li = tv->vval.v_list->lv_first, \ + }, \ + }, \ + })); \ break; \ } \ case VAR_SPECIAL: { \ @@ -475,14 +472,14 @@ static int name##_convert_one_value(firstargtype firstargname, \ kMPConvList); \ CONV_LIST_START(val_di->di_tv.vval.v_list); \ kv_push(MPConvStackVal, *mpstack, ((MPConvStackVal) { \ - .type = kMPConvList, \ - .data = { \ - .l = { \ - .list = val_di->di_tv.vval.v_list, \ - .li = val_di->di_tv.vval.v_list->lv_first, \ - }, \ - }, \ - })); \ + .type = kMPConvList, \ + .data = { \ + .l = { \ + .list = val_di->di_tv.vval.v_list, \ + .li = val_di->di_tv.vval.v_list->lv_first, \ + }, \ + }, \ + })); \ break; \ } \ case kMPMap: { \ @@ -504,14 +501,14 @@ static int name##_convert_one_value(firstargtype firstargname, \ CHECK_SELF_REFERENCE(val_list, lv_copyID, kMPConvPairs); \ CONV_DICT_START(val_list->lv_len); \ kv_push(MPConvStackVal, *mpstack, ((MPConvStackVal) { \ - .type = kMPConvPairs, \ - .data = { \ - .l = { \ - .list = val_list, \ - .li = val_list->lv_first, \ - }, \ - }, \ - })); \ + .type = kMPConvPairs, \ + .data = { \ + .l = { \ + .list = val_list, \ + .li = val_list->lv_first, \ + }, \ + }, \ + })); \ break; \ } \ case kMPExt: { \ @@ -543,15 +540,15 @@ name##_convert_one_value_regular_dict: \ CHECK_SELF_REFERENCE(tv->vval.v_dict, dv_copyID, kMPConvDict); \ CONV_DICT_START(tv->vval.v_dict->dv_hashtab.ht_used); \ kv_push(MPConvStackVal, *mpstack, ((MPConvStackVal) { \ - .type = kMPConvDict, \ - .data = { \ - .d = { \ - .dict = tv->vval.v_dict, \ - .hi = tv->vval.v_dict->dv_hashtab.ht_array, \ - .todo = tv->vval.v_dict->dv_hashtab.ht_used, \ - }, \ - }, \ - })); \ + .type = kMPConvDict, \ + .data = { \ + .d = { \ + .dict = tv->vval.v_dict, \ + .hi = tv->vval.v_dict->dv_hashtab.ht_array, \ + .todo = tv->vval.v_dict->dv_hashtab.ht_used, \ + }, \ + }, \ + })); \ break; \ } \ case VAR_UNKNOWN: { \ @@ -971,7 +968,7 @@ static inline int convert_to_json_string(garray_T *const gap, if (vim_isprintc(ch)) { ga_concat_len(gap, buf + i, shift); } else if (ch < SURROGATE_FIRST_CHAR) { - ga_concat_len(gap, ((const char []) { + ga_concat_len(gap, ((const char[]) { '\\', 'u', xdigits[(ch >> (4 * 3)) & 0xF], xdigits[(ch >> (4 * 2)) & 0xF], @@ -982,7 +979,7 @@ static inline int convert_to_json_string(garray_T *const gap, uint32_t tmp = (uint32_t) ch - SURROGATE_FIRST_CHAR; uint16_t hi = SURROGATE_HI_START + ((tmp >> 10) & ((1 << 10) - 1)); uint16_t lo = SURROGATE_LO_END + ((tmp >> 0) & ((1 << 10) - 1)); - ga_concat_len(gap, ((const char []) { + ga_concat_len(gap, ((const char[]) { '\\', 'u', xdigits[(hi >> (4 * 3)) & 0xF], xdigits[(hi >> (4 * 2)) & 0xF], -- cgit From 2c378fdfaf4927b7071b2e673c19c8acb8dcdfd4 Mon Sep 17 00:00:00 2001 From: ZyX Date: Fri, 5 Feb 2016 00:29:47 +0300 Subject: eval/decode: Parse strings with NUL to special dictionaries --- src/nvim/eval/decode.c | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 5a3c5709ad..8c95b34326 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -342,6 +342,7 @@ int json_decode_string(const char *const buf, const size_t len, char *str = xmalloc(len + 1); int fst_in_pair = 0; char *str_end = str; + bool hasnul = false; for (const char *t = s; t < p; t++) { if (t[0] != '\\' || t[1] != 'u') { if (fst_in_pair != 0) { @@ -357,6 +358,9 @@ int json_decode_string(const char *const buf, const size_t len, t += 4; unsigned long ch; vim_str2nr((char_u *) ubuf, NULL, NULL, 0, 0, 2, NULL, &ch); + if (ch == 0) { + hasnul = true; + } if (SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) { fst_in_pair = (int) ch; } else if (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END @@ -405,9 +409,9 @@ int json_decode_string(const char *const buf, const size_t len, str_end += utf_char2bytes((int) fst_in_pair, (char_u *) str_end); } if (conv.vc_type != CONV_NONE) { - size_t len = (size_t) (str_end - str); + size_t str_len = (size_t) (str_end - str); char *const new_str = (char *) string_convert(&conv, (char_u *) str, - &len); + &str_len); if (new_str == NULL) { EMSG2(_("E474: Failed to convert string \"%s\" from UTF-8"), str); xfree(str); @@ -415,14 +419,31 @@ int json_decode_string(const char *const buf, const size_t len, } xfree(str); str = new_str; - str_end = new_str + len; + str_end = new_str + str_len; + } + if (hasnul) { + typval_T obj; + list_T *const list = list_alloc(); + list->lv_refcount++; + create_special_dict(&obj, kMPString, ((typval_T) { + .v_type = VAR_LIST, + .v_lock = 0, + .vval = { .v_list = list }, + })); + if (encode_list_write((void *) list, str, (size_t) (str_end - str)) + == -1) { + clear_tv(&obj); + goto json_decode_string_fail; + } + POP(obj); + } else { + *str_end = NUL; + // TODO: return special string in case of NUL bytes + POP(((typval_T) { + .v_type = VAR_STRING, + .vval = { .v_string = (char_u *) str, }, + })); } - *str_end = NUL; - // TODO: return special string in case of NUL bytes - POP(((typval_T) { - .v_type = VAR_STRING, - .vval = { .v_string = (char_u *) str, }, - })); break; } case '-': -- cgit From e303ea8a19bcd385eb7829beb7f2ef691c064b35 Mon Sep 17 00:00:00 2001 From: ZyX Date: Fri, 5 Feb 2016 02:29:10 +0300 Subject: eval/decode: Add support for special maps Special dictionaries representing map are created when encountering duplicate key or when key is empty or contains NUL. Also checks that values are separated by a comma/colon properly. --- src/nvim/eval/decode.c | 241 ++++++++++++++++++++++++++++++++++++------------- src/nvim/eval/encode.c | 1 + 2 files changed, 181 insertions(+), 61 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 8c95b34326..7fffe1c48b 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -13,12 +13,28 @@ /// Helper structure for container_struct typedef struct { - size_t stack_index; ///< Index of current container in stack. - typval_T container; ///< Container. Either VAR_LIST, VAR_DICT or VAR_LIST - ///< which is _VAL from special dictionary. + size_t stack_index; ///< Index of current container in stack. + list_T *special_val; ///< _VAL key contents for special maps. + ///< When container is not a special dictionary it is + ///< NULL. + const char *s; ///< Location where container starts. + typval_T container; ///< Container. Either VAR_LIST, VAR_DICT or VAR_LIST + ///< which is _VAL from special dictionary. } ContainerStackItem; -typedef kvec_t(typval_T) ValuesStack; +/// Helper structure for values struct +typedef struct { + bool is_special_string; ///< Indicates that current value is a special + ///< dictionary with string. + bool didcomma; ///< True if previous token was comma. + bool didcolon; ///< True if previous token was colon. + typval_T val; ///< Actual value. +} ValuesStackItem; + +/// Vector containing values not yet saved in any container +typedef kvec_t(ValuesStackItem) ValuesStack; + +/// Vector containing containers, each next container is located inside previous typedef kvec_t(ContainerStackItem) ContainerStack; #ifdef INCLUDE_GENERATED_DECLARATIONS @@ -58,59 +74,119 @@ static inline void create_special_dict(typval_T *const rettv, /// @param[in] obj New object. /// @param[out] stack Object stack. /// @param[out] container_stack Container objects stack. -/// @param[in] p Position in string which is currently being parsed. +/// @param[in,out] pp Position in string which is currently being parsed. Used +/// for error reporting and is also set when decoding is +/// restarted due to the necessity of converting regular +/// dictionary to a special map. +/// @param[out] next_map_special Is set to true when dictionary is converted +/// to a special map, otherwise not touched. +/// @param[out] didcomma True if previous token was comma. Is set to recorded +/// value when decoder is restarted, otherwise unused. +/// @param[out] didcolon True if previous token was colon. Is set to recorded +/// value when decoder is restarted, otherwise unused. /// /// @return OK in case of success, FAIL in case of error. -static inline int json_decoder_pop(typval_T obj, ValuesStack *const stack, +static inline int json_decoder_pop(ValuesStackItem obj, + ValuesStack *const stack, ContainerStack *const container_stack, - const char *const p) + const char **const pp, + bool *const next_map_special, + bool *const didcomma, + bool *const didcolon) FUNC_ATTR_NONNULL_ALL { if (kv_size(*container_stack) == 0) { - kv_push(typval_T, *stack, obj); + kv_push(ValuesStackItem, *stack, obj); return OK; } ContainerStackItem last_container = kv_last(*container_stack); - if (obj.v_type == last_container.container.v_type + const char *val_location = *pp; + if (obj.val.v_type == last_container.container.v_type // vval.v_list and vval.v_dict should have the same size and offset - && ((void *) obj.vval.v_list + && ((void *) obj.val.vval.v_list == (void *) last_container.container.vval.v_list)) { kv_pop(*container_stack); + val_location = last_container.s; last_container = kv_last(*container_stack); } if (last_container.container.v_type == VAR_LIST) { + if (last_container.container.vval.v_list->lv_len != 0 + && !obj.didcomma) { + EMSG2(_("E474: Expected comma before list item: %s"), val_location); + clear_tv(&obj.val); + return FAIL; + } + assert(last_container.special_val == NULL); listitem_T *obj_li = listitem_alloc(); - obj_li->li_tv = obj; + obj_li->li_tv = obj.val; list_append(last_container.container.vval.v_list, obj_li); } else if (last_container.stack_index == kv_size(*stack) - 2) { - typval_T key = kv_pop(*stack); - if (key.v_type != VAR_STRING) { - assert(false); - } else if (key.vval.v_string == NULL || *key.vval.v_string == NUL) { - // TODO: fall back to special dict in case of empty key - EMSG(_("E474: Empty key")); - clear_tv(&obj); + if (!obj.didcolon) { + EMSG2(_("E474: Expected colon before dictionary value: %s"), + val_location); + clear_tv(&obj.val); return FAIL; } - dictitem_T *obj_di = dictitem_alloc(key.vval.v_string); - clear_tv(&key); - if (dict_add(last_container.container.vval.v_dict, obj_di) - == FAIL) { - // TODO: fall back to special dict in case of duplicate keys - EMSG(_("E474: Duplicate key")); - dictitem_free(obj_di); - clear_tv(&obj); - return FAIL; + ValuesStackItem key = kv_pop(*stack); + if (last_container.special_val == NULL) { + // These cases should have already been handled. + assert(!(key.is_special_string + || key.val.vval.v_string == NULL + || *key.val.vval.v_string == NUL)); + dictitem_T *obj_di = dictitem_alloc(key.val.vval.v_string); + clear_tv(&key.val); + if (dict_add(last_container.container.vval.v_dict, obj_di) + == FAIL) { + assert(false); + } + obj_di->di_tv = obj.val; + } else { + list_T *const kv_pair = list_alloc(); + list_append_list(last_container.special_val, kv_pair); + listitem_T *const key_li = listitem_alloc(); + key_li->li_tv = key.val; + list_append(kv_pair, key_li); + listitem_T *const val_li = listitem_alloc(); + val_li->li_tv = obj.val; + list_append(kv_pair, val_li); } - obj_di->di_tv = obj; } else { // Object with key only - if (obj.v_type != VAR_STRING) { - EMSG2(_("E474: Expected string key: %s"), p); - clear_tv(&obj); + if (!obj.is_special_string && obj.val.v_type != VAR_STRING) { + EMSG2(_("E474: Expected string key: %s"), *pp); + clear_tv(&obj.val); return FAIL; + } else if (!obj.didcomma + && (last_container.special_val == NULL + && (last_container.container.vval.v_dict->dv_hashtab.ht_used + != 0))) { + EMSG2(_("E474: Expected comma before dictionary key: %s"), val_location); + clear_tv(&obj.val); + return FAIL; + } + // Handle empty key and key represented as special dictionary + if (last_container.special_val == NULL + && (obj.is_special_string + || obj.val.vval.v_string == NULL + || *obj.val.vval.v_string == NUL + || dict_find(last_container.container.vval.v_dict, + obj.val.vval.v_string, -1))) { + clear_tv(&obj.val); + + // Restart + kv_pop(*container_stack); + ValuesStackItem last_container_val = + kv_A(*stack, last_container.stack_index); + while (kv_size(*stack) > last_container.stack_index) { + clear_tv(&(kv_pop(*stack).val)); + } + *pp = last_container.s; + *didcomma = last_container_val.didcomma; + *didcolon = last_container_val.didcolon; + *next_map_special = true; + return OK; } - kv_push(typval_T, *stack, obj); + kv_push(ValuesStackItem, *stack, obj); } return OK; } @@ -126,7 +202,7 @@ int json_decode_string(const char *const buf, const size_t len, typval_T *const rettv) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT { - vimconv_T conv; + vimconv_T conv = { .vc_type = CONV_NONE }; convert_setup(&conv, (char_u *) "utf-8", p_enc); conv.vc_fail = true; int ret = OK; @@ -138,14 +214,29 @@ int json_decode_string(const char *const buf, const size_t len, const char *const e = buf + len; bool didcomma = false; bool didcolon = false; -#define POP(obj) \ + bool next_map_special = false; +#define OBJ(obj_tv, is_sp_string) \ + ((ValuesStackItem) { \ + .is_special_string = (is_sp_string), \ + .val = (obj_tv), \ + .didcomma = didcomma, \ + .didcolon = didcolon, \ + }) +#define POP(obj_tv, is_sp_string) \ do { \ - if (json_decoder_pop(obj, &stack, &container_stack, p) == FAIL) { \ + if (json_decoder_pop(OBJ(obj_tv, is_sp_string), &stack, &container_stack, \ + &p, &next_map_special, &didcomma, &didcolon) \ + == FAIL) { \ goto json_decode_string_fail; \ } \ + if (next_map_special) { \ + goto json_decode_string_cycle_start; \ + } \ } while (0) const char *p = buf; for (; p < e; p++) { +json_decode_string_cycle_start: + assert(*p == '{' || next_map_special == false); switch (*p) { case '}': case ']': { @@ -176,8 +267,11 @@ int json_decode_string(const char *const buf, const size_t len, kv_pop(container_stack); goto json_decode_string_after_cycle; } else { - typval_T obj = kv_pop(stack); - POP(obj); + if (json_decoder_pop(kv_pop(stack), &stack, &container_stack, &p, + &next_map_special, &didcomma, &didcolon) == FAIL) { + goto json_decode_string_fail; + } + assert(!next_map_special); break; } } @@ -197,11 +291,12 @@ int json_decode_string(const char *const buf, const size_t len, && last_container.stack_index != kv_size(stack) - 1) { EMSG2(_("E474: Using comma in place of colon: %s"), p); goto json_decode_string_fail; - } else if ((last_container.container.v_type == VAR_DICT - && (last_container.container.vval.v_dict->dv_hashtab.ht_used - == 0)) - || (last_container.container.v_type == VAR_LIST - && last_container.container.vval.v_list->lv_len == 0)) { + } else if (last_container.special_val == NULL + ? (last_container.container.v_type == VAR_DICT + ? (last_container.container.vval.v_dict->dv_hashtab.ht_used + == 0) + : (last_container.container.vval.v_list->lv_len == 0)) + : (last_container.special_val->lv_len == 0)) { EMSG2(_("E474: Leading comma: %s"), p); goto json_decode_string_fail; } @@ -241,7 +336,7 @@ int json_decode_string(const char *const buf, const size_t len, goto json_decode_string_fail; } p += 3; - POP(get_vim_var_tv(VV_NULL)); + POP(get_vim_var_tv(VV_NULL), false); break; } case 't': { @@ -250,7 +345,7 @@ int json_decode_string(const char *const buf, const size_t len, goto json_decode_string_fail; } p += 3; - POP(get_vim_var_tv(VV_TRUE)); + POP(get_vim_var_tv(VV_TRUE), false); break; } case 'f': { @@ -259,7 +354,7 @@ int json_decode_string(const char *const buf, const size_t len, goto json_decode_string_fail; } p += 4; - POP(get_vim_var_tv(VV_FALSE)); + POP(get_vim_var_tv(VV_FALSE), false); break; } case '"': { @@ -339,6 +434,13 @@ int json_decode_string(const char *const buf, const size_t len, EMSG2(_("E474: Expected string end: %s"), buf); goto json_decode_string_fail; } + if (len == 0) { + POP(((typval_T) { + .v_type = VAR_STRING, + .vval = { .v_string = NULL }, + }), false); + break; + } char *str = xmalloc(len + 1); int fst_in_pair = 0; char *str_end = str; @@ -435,14 +537,13 @@ int json_decode_string(const char *const buf, const size_t len, clear_tv(&obj); goto json_decode_string_fail; } - POP(obj); + POP(obj, true); } else { *str_end = NUL; - // TODO: return special string in case of NUL bytes POP(((typval_T) { .v_type = VAR_STRING, - .vval = { .v_string = (char_u *) str, }, - })); + .vval = { .v_string = (char_u *) str }, + }), false); } break; } @@ -510,7 +611,7 @@ int json_decode_string(const char *const buf, const size_t len, vim_str2nr((char_u *) s, NULL, NULL, 0, 0, 0, &nr, NULL); tv.vval.v_number = (varnumber_T) nr; } - POP(tv); + POP(tv, false); p--; break; } @@ -524,24 +625,41 @@ int json_decode_string(const char *const buf, const size_t len, }; kv_push(ContainerStackItem, container_stack, ((ContainerStackItem) { .stack_index = kv_size(stack), + .s = p, .container = tv, + .special_val = NULL, })); - kv_push(typval_T, stack, tv); + kv_push(ValuesStackItem, stack, OBJ(tv, false)); break; } case '{': { - dict_T *dict = dict_alloc(); - dict->dv_refcount++; - typval_T tv = { - .v_type = VAR_DICT, - .v_lock = VAR_UNLOCKED, - .vval = { .v_dict = dict }, - }; + typval_T tv; + list_T *val_list = NULL; + if (next_map_special) { + next_map_special = false; + val_list = list_alloc(); + val_list->lv_refcount++; + create_special_dict(&tv, kMPMap, ((typval_T) { + .v_type = VAR_LIST, + .v_lock = VAR_UNLOCKED, + .vval = { .v_list = val_list }, + })); + } else { + dict_T *dict = dict_alloc(); + dict->dv_refcount++; + tv = (typval_T) { + .v_type = VAR_DICT, + .v_lock = VAR_UNLOCKED, + .vval = { .v_dict = dict }, + }; + } kv_push(ContainerStackItem, container_stack, ((ContainerStackItem) { .stack_index = kv_size(stack), + .s = p, .container = tv, + .special_val = val_list, })); - kv_push(typval_T, stack, tv); + kv_push(ValuesStackItem, stack, OBJ(tv, false)); break; } default: { @@ -557,6 +675,7 @@ int json_decode_string(const char *const buf, const size_t len, } } #undef POP +#undef OBJ json_decode_string_after_cycle: for (; p < e; p++) { switch (*p) { @@ -579,12 +698,12 @@ json_decode_string_after_cycle: json_decode_string_fail: ret = FAIL; while (kv_size(stack)) { - clear_tv(&kv_pop(stack)); + clear_tv(&(kv_pop(stack).val)); } json_decode_string_ret: if (ret != FAIL) { assert(kv_size(stack) == 1); - *rettv = kv_pop(stack); + *rettv = kv_pop(stack).val; } kv_destroy(stack); kv_destroy(container_stack); diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 5c843357f2..0fc975ed42 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -897,6 +897,7 @@ static inline int convert_to_json_string(garray_T *const gap, size_t len_ = len; char *tofree = NULL; if (last_p_enc != (const void *) p_enc) { + p_enc_conv.vc_type = CONV_NONE; convert_setup(&p_enc_conv, p_enc, "utf-8"); p_enc_conv.vc_fail = true; last_p_enc = p_enc; -- cgit From a3b87fc19b652065d96cec8f571d3245f1fc2446 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 6 Feb 2016 02:36:07 +0300 Subject: eval: Remove get_vim_var_tv function --- src/nvim/eval/decode.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 7fffe1c48b..de89f9c132 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -336,7 +336,11 @@ json_decode_string_cycle_start: goto json_decode_string_fail; } p += 3; - POP(get_vim_var_tv(VV_NULL), false); + POP(((typval_T) { + .v_type = VAR_SPECIAL, + .v_lock = VAR_UNLOCKED, + .vval = { .v_special = kSpecialVarNull }, + }), false); break; } case 't': { @@ -345,7 +349,11 @@ json_decode_string_cycle_start: goto json_decode_string_fail; } p += 3; - POP(get_vim_var_tv(VV_TRUE), false); + POP(((typval_T) { + .v_type = VAR_SPECIAL, + .v_lock = VAR_UNLOCKED, + .vval = { .v_special = kSpecialVarTrue }, + }), false); break; } case 'f': { @@ -354,7 +362,11 @@ json_decode_string_cycle_start: goto json_decode_string_fail; } p += 4; - POP(get_vim_var_tv(VV_FALSE), false); + POP(((typval_T) { + .v_type = VAR_SPECIAL, + .v_lock = VAR_UNLOCKED, + .vval = { .v_special = kSpecialVarFalse }, + }), false); break; } case '"': { @@ -716,11 +728,21 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) { switch (mobj.type) { case MSGPACK_OBJECT_NIL: { - *rettv = get_vim_var_tv(VV_NULL); + *rettv = (typval_T) { + .v_type = VAR_SPECIAL, + .v_lock = VAR_UNLOCKED, + .vval = { .v_special = kSpecialVarNull }, + }; break; } case MSGPACK_OBJECT_BOOLEAN: { - *rettv = get_vim_var_tv(mobj.via.boolean ? VV_TRUE : VV_FALSE); + *rettv = (typval_T) { + .v_type = VAR_SPECIAL, + .v_lock = VAR_UNLOCKED, + .vval = { + .v_special = mobj.via.boolean ? kSpecialVarTrue : kSpecialVarFalse + }, + }; break; } case MSGPACK_OBJECT_POSITIVE_INTEGER: { -- cgit From 6167ce6df2753d5474ad49aea19f5957128ab015 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 6 Feb 2016 02:46:23 +0300 Subject: eval: Remove v:none To get v:none back just rever this commit. This will not make json*() functions compatible with Vim though. --- src/nvim/eval/encode.c | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 0fc975ed42..1550a61f7f 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -55,7 +55,6 @@ typedef kvec_t(MPConvStackVal) MPConvStack; const char *const encode_special_var_names[] = { [kSpecialVarNull] = "null", - [kSpecialVarNone] = "none", [kSpecialVarTrue] = "true", [kSpecialVarFalse] = "false", }; @@ -358,10 +357,6 @@ static int name##_convert_one_value(firstargtype firstargname, \ CONV_BOOL(tv->vval.v_special == kSpecialVarTrue); \ break; \ } \ - case kSpecialVarNone: { \ - CONV_NONE_VAL(); \ - break; \ - } \ } \ break; \ } \ @@ -726,9 +721,6 @@ encode_vim_to_##name##_error_ret: \ #define CONV_BOOL(num) \ ga_concat(gap, ((num)? "v:true": "v:false")) -#define CONV_NONE_VAL() \ - ga_concat(gap, "v:none") - #define CONV_UNSIGNED_NUMBER(num) #define CONV_DICT_START(len) \ @@ -1074,9 +1066,6 @@ static inline bool check_json_key(const typval_T *const tv) } \ } while (0) -#undef CONV_NONE_VAL -#define CONV_NONE_VAL() - DEFINE_VIML_CONV_FUNCTIONS(static, json, garray_T *const, gap) #undef CONV_STRING @@ -1090,7 +1079,6 @@ DEFINE_VIML_CONV_FUNCTIONS(static, json, garray_T *const, gap) #undef CONV_EMPTY_DICT #undef CONV_NIL #undef CONV_BOOL -#undef CONV_NONE_VAL #undef CONV_UNSIGNED_NUMBER #undef CONV_DICT_START #undef CONV_DICT_END @@ -1226,10 +1214,6 @@ char *encode_tv2json(typval_T *tv, size_t *len) #define CONV_NIL() \ msgpack_pack_nil(packer) -#define CONV_NONE_VAL() \ - return conv_error(_("E953: Attempt to convert v:none in %s, %s"), \ - mpstack, objname) - #define CONV_BOOL(num) \ do { \ if ((num)) { \ @@ -1277,7 +1261,6 @@ DEFINE_VIML_CONV_FUNCTIONS(, msgpack, msgpack_packer *const, packer) #undef CONV_EMPTY_DICT #undef CONV_NIL #undef CONV_BOOL -#undef CONV_NONE_VAL #undef CONV_UNSIGNED_NUMBER #undef CONV_DICT_START #undef CONV_DICT_END -- cgit From 33778c36ccc62d83d24ab30181926ba44fa4eecf Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 6 Feb 2016 03:14:10 +0300 Subject: *: Fix linter errors --- src/nvim/eval/decode.c | 55 +++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 25 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index de89f9c132..29a1b2a82a 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -69,6 +69,8 @@ static inline void create_special_dict(typval_T *const rettv, }; } +#define DICT_LEN(dict) (dict)->dv_hashtab.ht_used + /// Helper function used for working with stack vectors used by JSON decoder /// /// @param[in] obj New object. @@ -158,8 +160,7 @@ static inline int json_decoder_pop(ValuesStackItem obj, return FAIL; } else if (!obj.didcomma && (last_container.special_val == NULL - && (last_container.container.vval.v_dict->dv_hashtab.ht_used - != 0))) { + && (DICT_LEN(last_container.container.vval.v_dict) != 0))) { EMSG2(_("E474: Expected comma before dictionary key: %s"), val_location); clear_tv(&obj.val); return FAIL; @@ -191,6 +192,25 @@ static inline int json_decoder_pop(ValuesStackItem obj, return OK; } +#define OBJ(obj_tv, is_sp_string) \ + ((ValuesStackItem) { \ + .is_special_string = (is_sp_string), \ + .val = (obj_tv), \ + .didcomma = didcomma, \ + .didcolon = didcolon, \ + }) +#define POP(obj_tv, is_sp_string) \ + do { \ + if (json_decoder_pop(OBJ(obj_tv, is_sp_string), &stack, &container_stack, \ + &p, &next_map_special, &didcomma, &didcolon) \ + == FAIL) { \ + goto json_decode_string_fail; \ + } \ + if (next_map_special) { \ + goto json_decode_string_cycle_start; \ + } \ + } while (0) + /// Convert JSON string into VimL object /// /// @param[in] buf String to convert. UTF-8 encoding is assumed. @@ -215,24 +235,6 @@ int json_decode_string(const char *const buf, const size_t len, bool didcomma = false; bool didcolon = false; bool next_map_special = false; -#define OBJ(obj_tv, is_sp_string) \ - ((ValuesStackItem) { \ - .is_special_string = (is_sp_string), \ - .val = (obj_tv), \ - .didcomma = didcomma, \ - .didcolon = didcolon, \ - }) -#define POP(obj_tv, is_sp_string) \ - do { \ - if (json_decoder_pop(OBJ(obj_tv, is_sp_string), &stack, &container_stack, \ - &p, &next_map_special, &didcomma, &didcolon) \ - == FAIL) { \ - goto json_decode_string_fail; \ - } \ - if (next_map_special) { \ - goto json_decode_string_cycle_start; \ - } \ - } while (0) const char *p = buf; for (; p < e; p++) { json_decode_string_cycle_start: @@ -268,7 +270,8 @@ json_decode_string_cycle_start: goto json_decode_string_after_cycle; } else { if (json_decoder_pop(kv_pop(stack), &stack, &container_stack, &p, - &next_map_special, &didcomma, &didcolon) == FAIL) { + &next_map_special, &didcomma, &didcolon) + == FAIL) { goto json_decode_string_fail; } assert(!next_map_special); @@ -293,8 +296,7 @@ json_decode_string_cycle_start: goto json_decode_string_fail; } else if (last_container.special_val == NULL ? (last_container.container.v_type == VAR_DICT - ? (last_container.container.vval.v_dict->dv_hashtab.ht_used - == 0) + ? (DICT_LEN(last_container.container.vval.v_dict) == 0) : (last_container.container.vval.v_list->lv_len == 0)) : (last_container.special_val->lv_len == 0)) { EMSG2(_("E474: Leading comma: %s"), p); @@ -686,8 +688,6 @@ json_decode_string_cycle_start: break; } } -#undef POP -#undef OBJ json_decode_string_after_cycle: for (; p < e; p++) { switch (*p) { @@ -722,6 +722,11 @@ json_decode_string_ret: return ret; } +#undef POP +#undef OBJ + +#undef DICT_LEN + /// Convert msgpack object to a VimL one int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT -- cgit From f4ea114c672dbd62088b3107775060c58209a9b7 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 6 Feb 2016 03:19:55 +0300 Subject: eval/decode: Fix vim_str2nr invocation --- src/nvim/eval/decode.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 29a1b2a82a..8a5684136b 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -470,10 +470,11 @@ json_decode_string_cycle_start: t++; switch (*t) { case 'u': { - const char ubuf[] = { t[1], t[2], t[3], t[4], 0 }; + const char ubuf[] = { t[1], t[2], t[3], t[4] }; t += 4; unsigned long ch; - vim_str2nr((char_u *) ubuf, NULL, NULL, 0, 0, 2, NULL, &ch); + vim_str2nr((char_u *) ubuf, NULL, NULL, + STR2NR_HEX | STR2NR_FORCE, NULL, &ch, 4); if (ch == 0) { hasnul = true; } @@ -622,7 +623,7 @@ json_decode_string_cycle_start: } else { // Convert integer long nr; - vim_str2nr((char_u *) s, NULL, NULL, 0, 0, 0, &nr, NULL); + vim_str2nr((char_u *) s, NULL, NULL, 0, &nr, NULL, (int) (p - s)); tv.vval.v_number = (varnumber_T) nr; } POP(tv, false); -- cgit From c91c0171dd7c72717866569be96e48bf838cdf0f Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 6 Feb 2016 03:31:45 +0300 Subject: *: Fix gcc warnings --- src/nvim/eval/decode.c | 6 +++--- src/nvim/eval/encode.c | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 8a5684136b..a89a9b8920 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -107,7 +107,7 @@ static inline int json_decoder_pop(ValuesStackItem obj, // vval.v_list and vval.v_dict should have the same size and offset && ((void *) obj.val.vval.v_list == (void *) last_container.container.vval.v_list)) { - kv_pop(*container_stack); + (void) kv_pop(*container_stack); val_location = last_container.s; last_container = kv_last(*container_stack); } @@ -175,7 +175,7 @@ static inline int json_decoder_pop(ValuesStackItem obj, clear_tv(&obj.val); // Restart - kv_pop(*container_stack); + (void) kv_pop(*container_stack); ValuesStackItem last_container_val = kv_A(*stack, last_container.stack_index); while (kv_size(*stack) > last_container.stack_index) { @@ -266,7 +266,7 @@ json_decode_string_cycle_start: } if (kv_size(stack) == 1) { p++; - kv_pop(container_stack); + (void) kv_pop(container_stack); goto json_decode_string_after_cycle; } else { if (json_decoder_pop(kv_pop(stack), &stack, &container_stack, &p, diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 1550a61f7f..48fbc44b0c 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -755,7 +755,7 @@ encode_vim_to_##name##_error_ret: \ char ebuf[NUMBUFLEN + 7]; \ size_t backref = 0; \ for (; backref < kv_size(*mpstack); backref++) { \ - const MPConvStackVal mpval = kv_a(MPConvStackVal, *mpstack, backref); \ + const MPConvStackVal mpval = kv_A(*mpstack, backref); \ if (mpval.type == conv_type) { \ if (conv_type == kMPConvDict) { \ if ((void *) mpval.data.d.dict == (void *) (val)) { \ @@ -783,7 +783,7 @@ DEFINE_VIML_CONV_FUNCTIONS(static, string, garray_T *const, gap) char ebuf[NUMBUFLEN + 7]; \ size_t backref = 0; \ for (; backref < kv_size(*mpstack); backref++) { \ - const MPConvStackVal mpval = kv_a(MPConvStackVal, *mpstack, backref); \ + const MPConvStackVal mpval = kv_A(*mpstack, backref); \ if (mpval.type == conv_type) { \ if (conv_type == kMPConvDict) { \ if ((void *) mpval.data.d.dict == (void *) val) { \ @@ -932,7 +932,7 @@ static inline int convert_to_json_string(garray_T *const gap, } else if (vim_isprintc(ch)) { str_len += shift; } else { - str_len += ((sizeof("\\u1234") - 1) * (1 + (ch > 0xFFFF))); + str_len += ((sizeof("\\u1234") - 1) * (size_t) (1 + (ch > 0xFFFF))); } break; } @@ -969,9 +969,9 @@ static inline int convert_to_json_string(garray_T *const gap, xdigits[(ch >> (4 * 0)) & 0xF], }), sizeof("\\u1234") - 1); } else { - uint32_t tmp = (uint32_t) ch - SURROGATE_FIRST_CHAR; - uint16_t hi = SURROGATE_HI_START + ((tmp >> 10) & ((1 << 10) - 1)); - uint16_t lo = SURROGATE_LO_END + ((tmp >> 0) & ((1 << 10) - 1)); + const int tmp = ch - SURROGATE_FIRST_CHAR; + const int hi = SURROGATE_HI_START + ((tmp >> 10) & ((1 << 10) - 1)); + const int lo = SURROGATE_LO_END + ((tmp >> 0) & ((1 << 10) - 1)); ga_concat_len(gap, ((const char[]) { '\\', 'u', xdigits[(hi >> (4 * 3)) & 0xF], -- cgit From 7124329bd915e3896b7f09083ff394cd7f598cb8 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 6 Feb 2016 22:19:23 +0300 Subject: *: Fix memory leaks found by clang sanitizer --- src/nvim/eval/decode.c | 1 + src/nvim/eval/encode.c | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index a89a9b8920..c6706eb0dd 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -552,6 +552,7 @@ json_decode_string_cycle_start: clear_tv(&obj); goto json_decode_string_fail; } + xfree(str); POP(obj, true); } else { *str_end = NUL; diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 48fbc44b0c..2df689990a 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -613,7 +613,7 @@ scope int encode_vim_to_##name(firstargtype firstargname, typval_T *const tv, \ CONV_DICT_BETWEEN_ITEMS(); \ } \ const list_T *const kv_pair = cur_mpsv->data.l.li->li_tv.vval.v_list; \ - CONV_SPECIAL_DICT_KEY_CHECK(kv_pair); \ + CONV_SPECIAL_DICT_KEY_CHECK(name, kv_pair); \ if (name##_convert_one_value(firstargname, &mpstack, \ &kv_pair->lv_first->li_tv, copyID, \ objname) == FAIL) { \ @@ -735,7 +735,7 @@ encode_vim_to_##name##_error_ret: \ #define CONV_DICT_BETWEEN_ITEMS() \ ga_concat(gap, ", ") -#define CONV_SPECIAL_DICT_KEY_CHECK(kv_pair) +#define CONV_SPECIAL_DICT_KEY_CHECK(name, kv_pair) #define CONV_LIST_END(lst) \ ga_append(gap, ']') @@ -1058,11 +1058,11 @@ static inline bool check_json_key(const typval_T *const tv) } #undef CONV_SPECIAL_DICT_KEY_CHECK -#define CONV_SPECIAL_DICT_KEY_CHECK(kv_pair) \ +#define CONV_SPECIAL_DICT_KEY_CHECK(name, kv_pair) \ do { \ if (!check_json_key(&kv_pair->lv_first->li_tv)) { \ EMSG(_("E474: Invalid key in special dictionary")); \ - return FAIL; \ + goto encode_vim_to_##name##_error_ret; \ } \ } while (0) @@ -1235,7 +1235,7 @@ char *encode_tv2json(typval_T *tv, size_t *len) #define CONV_DICT_BETWEEN_ITEMS() -#define CONV_SPECIAL_DICT_KEY_CHECK(kv_pair) +#define CONV_SPECIAL_DICT_KEY_CHECK(name, kv_pair) #define CONV_LIST_END(lst) -- cgit From 569e404622900222d88d856adbc6421734146bea Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 6 Feb 2016 23:07:53 +0300 Subject: eval/encode: Fix non-utf-8 &encoding handling, add tests --- src/nvim/eval/encode.c | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 2df689990a..b29a4c6f21 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -882,11 +882,11 @@ static inline int convert_to_json_string(garray_T *const gap, const size_t len) FUNC_ATTR_NONNULL_ARG(1) FUNC_ATTR_ALWAYS_INLINE { - const char *buf_ = buf; - if (buf_ == NULL) { + const char *utf_buf = buf; + if (utf_buf == NULL) { ga_concat(gap, "\"\""); } else { - size_t len_ = len; + size_t utf_len = len; char *tofree = NULL; if (last_p_enc != (const void *) p_enc) { p_enc_conv.vc_type = CONV_NONE; @@ -895,17 +895,28 @@ static inline int convert_to_json_string(garray_T *const gap, last_p_enc = p_enc; } if (p_enc_conv.vc_type != CONV_NONE) { - tofree = string_convert(&p_enc_conv, buf_, &len_); + tofree = string_convert(&p_enc_conv, buf, &utf_len); if (tofree == NULL) { - EMSG2(_("E474: Failed to convert string \"%s\" to UTF-8"), buf_); + EMSG2(_("E474: Failed to convert string \"%s\" to UTF-8"), utf_buf); return FAIL; } - buf_ = tofree; + utf_buf = tofree; } size_t str_len = 0; - for (size_t i = 0; i < len_;) { - const int ch = utf_ptr2char(buf + i); - const size_t shift = (ch == 0? 1: utf_ptr2len(buf + i)); + // Encode character as \u0000 if + // 1. It is an ASCII control character (0x0 .. 0x1F, 0x7F). + // 2. &encoding is not UTF-8 and code point is above 0x7F. + // 3. &encoding is UTF-8 and code point is not printable according to + // utf_printable(). + // This is done to make it possible to :echo values when &encoding is not + // UTF-8. +#define ENCODE_RAW(p_enc_conv, ch) \ + (ch >= 0x20 && (p_enc_conv.vc_type == CONV_NONE \ + ? utf_printable(ch) \ + : ch < 0x7F)) + for (size_t i = 0; i < utf_len;) { + const int ch = utf_ptr2char(utf_buf + i); + const size_t shift = (ch == 0? 1: utf_ptr2len(utf_buf + i)); assert(shift > 0); i += shift; switch (ch) { @@ -922,14 +933,14 @@ static inline int convert_to_json_string(garray_T *const gap, default: { if (ch > 0x7F && shift == 1) { EMSG2(_("E474: String \"%s\" contains byte that does not start any " - "UTF-8 character"), buf_); + "UTF-8 character"), utf_buf); return FAIL; } else if ((0xD800 <= ch && ch <= 0xDB7F) || (0xDC00 <= ch && ch <= 0xDFFF)) { EMSG2(_("E474: UTF-8 string contains code point which belongs " - "to surrogate pairs"), buf_); + "to surrogate pairs: %s"), utf_buf + i); return FAIL; - } else if (vim_isprintc(ch)) { + } else if (ENCODE_RAW(p_enc_conv, ch)) { str_len += shift; } else { str_len += ((sizeof("\\u1234") - 1) * (size_t) (1 + (ch > 0xFFFF))); @@ -940,12 +951,12 @@ static inline int convert_to_json_string(garray_T *const gap, } ga_append(gap, '"'); ga_grow(gap, (int) str_len); - for (size_t i = 0; i < len_;) { - const int ch = utf_ptr2char(buf + i); + for (size_t i = 0; i < utf_len;) { + const int ch = utf_ptr2char(utf_buf + i); const size_t shift = (ch == 0? 1: utf_char2len(ch)); assert(shift > 0); // Is false on invalid unicode, but this should already be handled. - assert(ch == 0 || shift == utf_ptr2len(buf + i)); + assert(ch == 0 || shift == utf_ptr2len(utf_buf + i)); switch (ch) { case BS: case TAB: @@ -958,8 +969,8 @@ static inline int convert_to_json_string(garray_T *const gap, break; } default: { - if (vim_isprintc(ch)) { - ga_concat_len(gap, buf + i, shift); + if (ENCODE_RAW(p_enc_conv, ch)) { + ga_concat_len(gap, utf_buf + i, shift); } else if (ch < SURROGATE_FIRST_CHAR) { ga_concat_len(gap, ((const char[]) { '\\', 'u', -- cgit From c27395ddc84952b94118de94af4c33f56f6beca5 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 7 Feb 2016 00:20:45 +0300 Subject: eval: Fix QuickBuild failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiler used by one VM in QuickBuild has found a number of false positives. Everything is fine on travis. List of failures: From [QuickBuild][1], build [7429][2]: 14:38:19,945 WARN - /home/quickbuild/buildagent/workspace/root/neovim/pull-requests-automated/src/nvim/eval.c: In function ‘assert_bool’: 14:38:19,945 WARN - /home/quickbuild/buildagent/workspace/root/neovim/pull-requests-automated/src/nvim/eval.c:7551:40: error: comparison between signed and unsigned integer expressions [-Werror=sign-compare] 14:38:20,058 WARN - cc1: all warnings being treated as errors . This is not making much sense (7551:40 is `!=` in `{SpecialVarValue} != ({bool}?{SpecialVarValue}:{SpecialVarValue})`), but this error is present. --- Also fail from [build][3] [4930][4]: 15:47:00,853 WARN - /home/quickbuild/buildagent/workspace/root/neovim/pull-requests-automated/src/nvim/eval/encode.c: In function ‘encode_read_from_list’: 15:47:00,853 WARN - /home/quickbuild/buildagent/workspace/root/neovim/pull-requests-automated/src/nvim/eval/encode.c:258:30: error: conversion to ‘char’ from ‘int’ may alter its value [-Werror=conversion] , pointing to `:` in `{char} = ({char} == {const} ? {const} : {char})` where `{const}` is character constant like `'\n'`. I have no idea where exactly it saw conversion, so simply casted everything to (char). --- [Build][5] error: 08:32:03,472 WARN - /home/quickbuild/buildagent/workspace/root/neovim/pull-requests-automated/src/nvim/eval.c: In function ‘tv_equal’: 08:32:03,472 WARN - /home/quickbuild/buildagent/workspace/root/neovim/pull-requests-automated/src/nvim/eval.c:5077:1: error: control reaches end of non-void function [-Werror=return-type] --- Build [4949][7]: 11:28:00,578 WARN - /home/quickbuild/buildagent/workspace/root/neovim/pull-requests-automated/src/nvim/eval.c: In function ‘f_type’: 11:28:00,578 WARN - /home/quickbuild/buildagent/workspace/root/neovim/pull-requests-automated/src/nvim/eval.c:16085:24: error: ‘n’ may be used uninitialized in this function [-Werror=uninitialized] 11:28:00,581 WARN - /home/quickbuild/buildagent/workspace/root/neovim/pull-requests-automated/src/nvim/eval.c: In function ‘f_empty’: 11:28:00,581 WARN - /home/quickbuild/buildagent/workspace/root/neovim/pull-requests-automated/src/nvim/eval.c:8505:24: error: ‘n’ may be used uninitialized in this function [-Werror=uninitialized] [1]: http://neovim-qb.szakmeister.net/wicket/page?5-1.ILinkListener-content-buildTab-panel-masterStep-body-children-0-step-body-children-2-body-children-3-step-body-children-0-step-body-children-0-step-head-logLink [2]: http://neovim-qb.szakmeister.net/build/4929 [3]: http://neovim-qb.szakmeister.net/build/4930 [4]: http://neovim-qb.szakmeister.net/wicket/page?1-1.ILinkListener-content-buildTab-panel-masterStep-body-children-0-step-body-children-1-body-children-3-step-body-children-0-step-body-children-0-step-head-logLink [5]: http://neovim-qb.szakmeister.net/build/4948/step_status [7]: http://neovim-qb.szakmeister.net/build/4949 --- src/nvim/eval/encode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index b29a4c6f21..8280889fbe 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -255,7 +255,7 @@ int encode_read_from_list(ListReaderState *const state, char *const buf, while (p < buf_end) { for (size_t i = state->offset; i < state->li_length && p < buf_end; i++) { const char ch = (char) state->li->li_tv.vval.v_string[state->offset++]; - *p++ = (ch == NL ? NUL : ch); + *p++ = (char) ((char) ch == (char) NL ? (char) NUL : (char) ch); } if (p < buf_end) { state->li = state->li->li_next; -- cgit From 77776b09c684bc2a0c42114fce5a8b04409ec91d Mon Sep 17 00:00:00 2001 From: ZyX Date: Tue, 9 Feb 2016 03:20:16 +0300 Subject: eval/encode: Fix writing strings starting with NL to list Error [found][1] by oni-link. [1]: https://github.com/neovim/neovim/pull/4131/files#r52239384 --- src/nvim/eval/encode.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 8280889fbe..6026189235 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -79,11 +79,9 @@ int encode_list_write(void *data, const char *buf, size_t len) do { const char *line_start = line_end; line_end = xmemscan(line_start, NL, (size_t) (end - line_start)); - if (line_end == line_start) { - list_append_allocated_string(list, NULL); - } else { + char *str = NULL; + if (line_end != line_start) { const size_t line_length = (size_t) (line_end - line_start); - char *str; if (li == NULL) { str = xmemdupz(line_start, line_length); } else { @@ -93,7 +91,7 @@ int encode_list_write(void *data, const char *buf, size_t len) li->li_tv.vval.v_string = xrealloc(li->li_tv.vval.v_string, li_len + line_length + 1); str = (char *) li->li_tv.vval.v_string + li_len; - memmove(str, line_start, line_length); + memcpy(str, line_start, line_length); str[line_length] = 0; } for (size_t i = 0; i < line_length; i++) { @@ -101,14 +99,14 @@ int encode_list_write(void *data, const char *buf, size_t len) str[i] = NL; } } - if (li == NULL) { - list_append_allocated_string(list, str); - } else { - li = NULL; - } - if (line_end == end - 1) { - list_append_allocated_string(list, NULL); - } + } + if (li == NULL) { + list_append_allocated_string(list, str); + } else { + li = NULL; + } + if (line_end == end - 1) { + list_append_allocated_string(list, NULL); } line_end++; } while (line_end < end); -- cgit From f0bd4a149408e75ebf887530964e0948518938dc Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 11 Feb 2016 01:29:09 +0300 Subject: eval/encode: Fix invalid UTF-8 strings handling: 1. Do not allow reading past buffer end when creating error messages. 2. Fix surrogate pairs range, avoid magic constants. --- src/nvim/eval/encode.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 6026189235..6fa22bfc5c 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -895,7 +895,8 @@ static inline int convert_to_json_string(garray_T *const gap, if (p_enc_conv.vc_type != CONV_NONE) { tofree = string_convert(&p_enc_conv, buf, &utf_len); if (tofree == NULL) { - EMSG2(_("E474: Failed to convert string \"%s\" to UTF-8"), utf_buf); + emsgf(_("E474: Failed to convert string \"%.*s\" to UTF-8"), + utf_len, utf_buf); return FAIL; } utf_buf = tofree; @@ -930,18 +931,21 @@ static inline int convert_to_json_string(garray_T *const gap, } default: { if (ch > 0x7F && shift == 1) { - EMSG2(_("E474: String \"%s\" contains byte that does not start any " - "UTF-8 character"), utf_buf); + emsgf(_("E474: String \"%.*s\" contains byte that does not start " + "any UTF-8 character"), + utf_len - (i - shift), utf_buf + i - shift); return FAIL; - } else if ((0xD800 <= ch && ch <= 0xDB7F) - || (0xDC00 <= ch && ch <= 0xDFFF)) { - EMSG2(_("E474: UTF-8 string contains code point which belongs " - "to surrogate pairs: %s"), utf_buf + i); + } else if ((SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) + || (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END)) { + emsgf(_("E474: UTF-8 string contains code point which belongs " + "to a surrogate pair: %.*s"), + utf_len - (i - shift), utf_buf + i - shift); return FAIL; } else if (ENCODE_RAW(p_enc_conv, ch)) { str_len += shift; } else { - str_len += ((sizeof("\\u1234") - 1) * (size_t) (1 + (ch > 0xFFFF))); + str_len += ((sizeof("\\u1234") - 1) + * (size_t) (1 + (ch >= SURROGATE_FIRST_CHAR))); } break; } -- cgit From 209427e97224ea7fdd49eb53fa41e0b26c55369f Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 11 Feb 2016 01:34:08 +0300 Subject: eval/encode: Reduce length of encode_list_write Changes suggested by oni-link. --- src/nvim/eval/encode.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 6fa22bfc5c..4d6a0afe9c 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -72,9 +72,6 @@ int encode_list_write(void *data, const char *buf, size_t len) list_T *const list = (list_T *) data; const char *const end = buf + len; const char *line_end = buf; - if (list->lv_last == NULL) { - list_append_string(list, NULL, 0); - } listitem_T *li = list->lv_last; do { const char *line_start = line_end; @@ -94,11 +91,7 @@ int encode_list_write(void *data, const char *buf, size_t len) memcpy(str, line_start, line_length); str[line_length] = 0; } - for (size_t i = 0; i < line_length; i++) { - if (str[i] == NUL) { - str[i] = NL; - } - } + memchrsub(str, NUL, NL, line_length); } if (li == NULL) { list_append_allocated_string(list, str); -- cgit From 4913a25dec5edc8888579a0f09a1b2f5f783c911 Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 11 Feb 2016 01:38:58 +0300 Subject: eval/encode: Free memory just in case After string_convert() with .vc_fail=true these blocks should never be entered because they indicate invalid unicode. --- src/nvim/eval/encode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 4d6a0afe9c..0096d9172b 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -927,12 +927,14 @@ static inline int convert_to_json_string(garray_T *const gap, emsgf(_("E474: String \"%.*s\" contains byte that does not start " "any UTF-8 character"), utf_len - (i - shift), utf_buf + i - shift); + xfree(tofree); return FAIL; } else if ((SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) || (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END)) { emsgf(_("E474: UTF-8 string contains code point which belongs " "to a surrogate pair: %.*s"), utf_len - (i - shift), utf_buf + i - shift); + xfree(tofree); return FAIL; } else if (ENCODE_RAW(p_enc_conv, ch)) { str_len += shift; -- cgit From af6603a6b4c9b1cb4a65eb2dc581295d8990c5ef Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 11 Feb 2016 01:40:40 +0300 Subject: eval/encode: Remove unneeded variable, add missing include --- src/nvim/eval/encode.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 0096d9172b..e3d0bf69b2 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -15,6 +15,7 @@ #include "nvim/garray.h" #include "nvim/mbyte.h" #include "nvim/message.h" +#include "nvim/memory.h" #include "nvim/charset.h" // vim_isprintc() #include "nvim/macros.h" #include "nvim/ascii.h" @@ -636,17 +637,10 @@ encode_vim_to_##name##_error_ret: \ ga_concat(gap, "''"); \ } else { \ const size_t len_ = (len); \ - size_t num_quotes = 0; \ - for (size_t i = 0; i < len_; i++) { \ - if (buf_[i] == '\'') { \ - num_quotes++; \ - } \ - } \ - ga_grow(gap, (int) (2 + len_ + num_quotes)); \ + ga_grow(gap, (int) (2 + len_ + memcnt(buf_, '\'', len_))); \ ga_append(gap, '\''); \ for (size_t i = 0; i < len_; i++) { \ if (buf_[i] == '\'') { \ - num_quotes++; \ ga_append(gap, '\''); \ } \ ga_append(gap, buf_[i]); \ -- cgit From 2f67786796d5fb4237f4b0258ec3db0982cc7f53 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 13 Feb 2016 21:39:28 +0300 Subject: eval: Rename json* functions to json_* --- src/nvim/eval/encode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index e3d0bf69b2..a131f5c3c1 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -1020,7 +1020,7 @@ static inline int convert_to_json_string(garray_T *const gap, "attempt to dump function reference"), \ mpstack, objname) -/// Check whether given key can be used in jsonencode() +/// Check whether given key can be used in json_encode() /// /// @param[in] tv Key to check. static inline bool check_json_key(const typval_T *const tv) -- cgit From 406562ac6d3863dfdaedbf40f9d4a23ca37c9ec5 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 21 Feb 2016 21:33:58 +0300 Subject: encode: Fail to dump NaN and infinity Thanks to vim/vim#654 --- src/nvim/eval/encode.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index a131f5c3c1..d21347cca6 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "nvim/eval/encode.h" #include "nvim/buffer_defs.h" // vimconv_T @@ -827,9 +828,23 @@ DEFINE_VIML_CONV_FUNCTIONS(, echo, garray_T *const, gap) #undef CONV_FLOAT #define CONV_FLOAT(flt) \ do { \ - char numbuf[NUMBUFLEN]; \ - vim_snprintf(numbuf, NUMBUFLEN - 1, "%g", (flt)); \ - ga_concat(gap, numbuf); \ + const float_T flt_ = (flt); \ + switch (fpclassify(flt_)) { \ + case FP_NAN: { \ + EMSG(_("E474: Unable to represent NaN value in JSON")); \ + return FAIL; \ + } \ + case FP_INFINITE: { \ + EMSG(_("E474: Unable to represent infinity in JSON")); \ + return FAIL; \ + } \ + default: { \ + char numbuf[NUMBUFLEN]; \ + vim_snprintf(numbuf, NUMBUFLEN - 1, "%g", flt_); \ + ga_concat(gap, (char_u *) numbuf); \ + break; \ + } \ + } \ } while (0) /// Last used p_enc value -- cgit From 942e0b338c9bff1dfdcb59e8308160449f1f38b4 Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 25 Feb 2016 17:27:23 +0300 Subject: encode: Handle incomplete surrogates like `\uSURR\uOTHR` properly --- src/nvim/eval/decode.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index c6706eb0dd..4955a4f5a4 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -459,12 +459,16 @@ json_decode_string_cycle_start: int fst_in_pair = 0; char *str_end = str; bool hasnul = false; +#define PUT_FST_IN_PAIR(fst_in_pair, str_end) \ + do { \ + if (fst_in_pair != 0) { \ + str_end += utf_char2bytes(fst_in_pair, (char_u *) str_end); \ + fst_in_pair = 0; \ + } \ + } while (0) for (const char *t = s; t < p; t++) { if (t[0] != '\\' || t[1] != 'u') { - if (fst_in_pair != 0) { - str_end += utf_char2bytes(fst_in_pair, (char_u *) str_end); - fst_in_pair = 0; - } + PUT_FST_IN_PAIR(fst_in_pair, str_end); } if (*t == '\\') { t++; @@ -489,6 +493,7 @@ json_decode_string_cycle_start: str_end += utf_char2bytes(full_char, (char_u *) str_end); fst_in_pair = 0; } else { + PUT_FST_IN_PAIR(fst_in_pair, str_end); str_end += utf_char2bytes((int) ch, (char_u *) str_end); } break; @@ -522,9 +527,8 @@ json_decode_string_cycle_start: *str_end++ = *t; } } - if (fst_in_pair != 0) { - str_end += utf_char2bytes((int) fst_in_pair, (char_u *) str_end); - } + PUT_FST_IN_PAIR(fst_in_pair, str_end); +#undef PUT_FST_IN_PAIR if (conv.vc_type != CONV_NONE) { size_t str_len = (size_t) (str_end - str); char *const new_str = (char *) string_convert(&conv, (char_u *) str, -- cgit From 4ff5d6e41c3217bb3bb081743ac8b33667322137 Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 7 Mar 2016 06:36:02 +0300 Subject: eval/decode: Also use VAR_UNLOCKED in old code --- src/nvim/eval/decode.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 4955a4f5a4..65c4359c49 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -54,7 +54,7 @@ static inline void create_special_dict(typval_T *const rettv, dict_T *const dict = dict_alloc(); dictitem_T *const type_di = dictitem_alloc((char_u *) "_TYPE"); type_di->di_tv.v_type = VAR_LIST; - type_di->di_tv.v_lock = 0; + type_di->di_tv.v_lock = VAR_UNLOCKED; type_di->di_tv.vval.v_list = (list_T *) eval_msgpack_type_lists[type]; type_di->di_tv.vval.v_list->lv_refcount++; dict_add(dict, type_di); @@ -548,7 +548,7 @@ json_decode_string_cycle_start: list->lv_refcount++; create_special_dict(&obj, kMPString, ((typval_T) { .v_type = VAR_LIST, - .v_lock = 0, + .v_lock = VAR_UNLOCKED, .vval = { .v_list = list }, })); if (encode_list_write((void *) list, str, (size_t) (str_end - str)) @@ -760,7 +760,7 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) if (mobj.via.u64 <= VARNUMBER_MAX) { *rettv = (typval_T) { .v_type = VAR_NUMBER, - .v_lock = 0, + .v_lock = VAR_UNLOCKED, .vval = { .v_number = (varnumber_T) mobj.via.u64 }, }; } else { @@ -768,7 +768,7 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) list->lv_refcount++; create_special_dict(rettv, kMPInteger, ((typval_T) { .v_type = VAR_LIST, - .v_lock = 0, + .v_lock = VAR_UNLOCKED, .vval = { .v_list = list }, })); uint64_t n = mobj.via.u64; @@ -783,7 +783,7 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) if (mobj.via.i64 >= VARNUMBER_MIN) { *rettv = (typval_T) { .v_type = VAR_NUMBER, - .v_lock = 0, + .v_lock = VAR_UNLOCKED, .vval = { .v_number = (varnumber_T) mobj.via.i64 }, }; } else { @@ -791,7 +791,7 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) list->lv_refcount++; create_special_dict(rettv, kMPInteger, ((typval_T) { .v_type = VAR_LIST, - .v_lock = 0, + .v_lock = VAR_UNLOCKED, .vval = { .v_list = list }, })); uint64_t n = -((uint64_t) mobj.via.i64); @@ -805,7 +805,7 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) case MSGPACK_OBJECT_FLOAT: { *rettv = (typval_T) { .v_type = VAR_FLOAT, - .v_lock = 0, + .v_lock = VAR_UNLOCKED, .vval = { .v_float = mobj.via.f64 }, }; break; @@ -815,7 +815,7 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) list->lv_refcount++; create_special_dict(rettv, kMPString, ((typval_T) { .v_type = VAR_LIST, - .v_lock = 0, + .v_lock = VAR_UNLOCKED, .vval = { .v_list = list }, })); if (encode_list_write((void *) list, mobj.via.str.ptr, mobj.via.str.size) @@ -828,7 +828,7 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) if (memchr(mobj.via.bin.ptr, NUL, mobj.via.bin.size) == NULL) { *rettv = (typval_T) { .v_type = VAR_STRING, - .v_lock = 0, + .v_lock = VAR_UNLOCKED, .vval = { .v_string = xmemdupz(mobj.via.bin.ptr, mobj.via.bin.size) }, }; break; @@ -837,7 +837,7 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) list->lv_refcount++; create_special_dict(rettv, kMPBinary, ((typval_T) { .v_type = VAR_LIST, - .v_lock = 0, + .v_lock = VAR_UNLOCKED, .vval = { .v_list = list }, })); if (encode_list_write((void *) list, mobj.via.bin.ptr, mobj.via.bin.size) @@ -851,7 +851,7 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) list->lv_refcount++; *rettv = (typval_T) { .v_type = VAR_LIST, - .v_lock = 0, + .v_lock = VAR_UNLOCKED, .vval = { .v_list = list }, }; for (size_t i = 0; i < mobj.via.array.size; i++) { @@ -877,7 +877,7 @@ int msgpack_to_vim(const msgpack_object mobj, typval_T *const rettv) dict->dv_refcount++; *rettv = (typval_T) { .v_type = VAR_DICT, - .v_lock = 0, + .v_lock = VAR_UNLOCKED, .vval = { .v_dict = dict }, }; for (size_t i = 0; i < mobj.via.map.size; i++) { @@ -902,7 +902,7 @@ msgpack_to_vim_generic_map: {} list->lv_refcount++; create_special_dict(rettv, kMPMap, ((typval_T) { .v_type = VAR_LIST, - .v_lock = 0, + .v_lock = VAR_UNLOCKED, .vval = { .v_list = list }, })); for (size_t i = 0; i < mobj.via.map.size; i++) { @@ -931,7 +931,7 @@ msgpack_to_vim_generic_map: {} list_append_list(list, ext_val_list); create_special_dict(rettv, kMPExt, ((typval_T) { .v_type = VAR_LIST, - .v_lock = 0, + .v_lock = VAR_UNLOCKED, .vval = { .v_list = list }, })); if (encode_list_write((void *) ext_val_list, mobj.via.ext.ptr, -- cgit From 1fc84ae2cd07f10f769ef966dd92b18ca8552748 Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 7 Mar 2016 06:39:12 +0300 Subject: eval/decode: Record that `obj` may be freed --- src/nvim/eval/decode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 65c4359c49..d6426ee643 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -73,7 +73,8 @@ static inline void create_special_dict(typval_T *const rettv, /// Helper function used for working with stack vectors used by JSON decoder /// -/// @param[in] obj New object. +/// @param[in,out] obj New object. Will either be put into the stack (and, +/// probably, also inside container) or freed. /// @param[out] stack Object stack. /// @param[out] container_stack Container objects stack. /// @param[in,out] pp Position in string which is currently being parsed. Used -- cgit From 4a29995fe74ed95c641ef40c68d8a4223e90cccf Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 7 Mar 2016 06:41:00 +0300 Subject: eval/decode: Rename brackets in error messages U+007D is officially RIGHT CURLY BRACKET. U+005D is officially RIGHT SQUARE BRACKET. --- src/nvim/eval/decode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index d6426ee643..fc6e912c20 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -249,10 +249,10 @@ json_decode_string_cycle_start: } ContainerStackItem last_container = kv_last(container_stack); if (*p == '}' && last_container.container.v_type != VAR_DICT) { - EMSG2(_("E474: Closing list with figure brace: %s"), p); + EMSG2(_("E474: Closing list with curly bracket: %s"), p); goto json_decode_string_fail; } else if (*p == ']' && last_container.container.v_type != VAR_LIST) { - EMSG2(_("E474: Closing dictionary with bracket: %s"), p); + EMSG2(_("E474: Closing dictionary with square bracket: %s"), p); goto json_decode_string_fail; } else if (didcomma) { EMSG2(_("E474: Trailing comma: %s"), p); -- cgit From 4eb5d05f018bc568580c85f17ddb304fcec364ca Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 7 Mar 2016 07:10:38 +0300 Subject: eval/decode: Avoid overflow when parsing incomplete null/true/false Note: second test does not crash or produce asan errors, even though it should. --- src/nvim/eval/decode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index fc6e912c20..35e8421716 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -334,7 +334,7 @@ json_decode_string_cycle_start: continue; } case 'n': { - if (strncmp(p + 1, "ull", 3) != 0) { + if ((p + 3) >= e || strncmp(p + 1, "ull", 3) != 0) { EMSG2(_("E474: Expected null: %s"), p); goto json_decode_string_fail; } @@ -347,7 +347,7 @@ json_decode_string_cycle_start: break; } case 't': { - if (strncmp(p + 1, "rue", 3) != 0) { + if ((p + 3) >= e || strncmp(p + 1, "rue", 3) != 0) { EMSG2(_("E474: Expected true: %s"), p); goto json_decode_string_fail; } @@ -360,7 +360,7 @@ json_decode_string_cycle_start: break; } case 'f': { - if (strncmp(p + 1, "alse", 4) != 0) { + if ((p + 4) >= e || strncmp(p + 1, "alse", 4) != 0) { EMSG2(_("E474: Expected false: %s"), p); goto json_decode_string_fail; } -- cgit From 394830631f130ad646f23358bf7863e7a37c6d78 Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 7 Mar 2016 07:27:14 +0300 Subject: eval/decode: Make sure that U+00C3 is parsed correctly --- src/nvim/eval/decode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 35e8421716..ce2723147d 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -431,7 +431,10 @@ json_decode_string_cycle_start: // and thus cannot possibly be equal to *p. But utf_ptr2char({0xFF, // 0}) will return 0xFF, even though 0xFF cannot start any UTF-8 // code point at all. - if (ch >= 0x80 && p_byte == ch) { + // + // The only exception is U+00C3 which is represented as 0xC3 0x83. + if (ch >= 0x80 && p_byte == ch && !( + ch == 0xC3 && p + 1 < e && (uint8_t) p[1] == 0x83)) { EMSG2(_("E474: Only UTF-8 strings allowed: %s"), p); goto json_decode_string_fail; } else if (ch > 0x10FFFF) { -- cgit From 224d7df6309319cfa1f98aad3aa93c5b63ee4145 Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 7 Mar 2016 07:37:21 +0300 Subject: eval/decode: Make sure that blank input does not crash Neovim --- src/nvim/eval/decode.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index ce2723147d..4ce47a5e19 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -223,6 +223,15 @@ int json_decode_string(const char *const buf, const size_t len, typval_T *const rettv) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT { + const char *p = buf; + const char *const e = buf + len; + while (p < e && (*p == ' ' || *p == '\t' || *p == '\n')) { + p++; + } + if (p == e) { + EMSG(_("E474: Attempt to decode a blank string")); + return FAIL; + } vimconv_T conv = { .vc_type = CONV_NONE }; convert_setup(&conv, (char_u *) "utf-8", p_enc); conv.vc_fail = true; @@ -232,11 +241,9 @@ int json_decode_string(const char *const buf, const size_t len, ContainerStack container_stack; kv_init(container_stack); rettv->v_type = VAR_UNKNOWN; - const char *const e = buf + len; bool didcomma = false; bool didcolon = false; bool next_map_special = false; - const char *p = buf; for (; p < e; p++) { json_decode_string_cycle_start: assert(*p == '{' || next_map_special == false); -- cgit From 52c6cc21899d0d5bf0dffc2cee849063e176e931 Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 7 Mar 2016 07:50:17 +0300 Subject: eval/decode: Make sure that parsing strings does not overflow --- src/nvim/eval/decode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 4ce47a5e19..8bd7f5d940 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -455,7 +455,7 @@ json_decode_string_cycle_start: p += ch_len; } } - if (*p != '"') { + if (p == e || *p != '"') { EMSG2(_("E474: Expected string end: %s"), buf); goto json_decode_string_fail; } -- cgit From eb806c96205ff776d9cd5df82da72c14e030f6d6 Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 7 Mar 2016 08:54:39 +0300 Subject: eval/decode: Make sure that error messages do not cause overflow --- src/nvim/eval/decode.c | 86 +++++++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 40 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 8bd7f5d940..266da86b74 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -215,16 +215,18 @@ static inline int json_decoder_pop(ValuesStackItem obj, /// Convert JSON string into VimL object /// /// @param[in] buf String to convert. UTF-8 encoding is assumed. -/// @param[in] len Length of the string. +/// @param[in] buf_len Length of the string. /// @param[out] rettv Location where to save results. /// /// @return OK in case of success, FAIL otherwise. -int json_decode_string(const char *const buf, const size_t len, +int json_decode_string(const char *const buf, const size_t buf_len, typval_T *const rettv) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT { +#define LENP(p, e) \ + ((int) ((e) - (p))), (p) const char *p = buf; - const char *const e = buf + len; + const char *const e = buf + buf_len; while (p < e && (*p == ' ' || *p == '\t' || *p == '\n')) { p++; } @@ -251,25 +253,26 @@ json_decode_string_cycle_start: case '}': case ']': { if (kv_size(container_stack) == 0) { - EMSG2(_("E474: No container to close: %s"), p); + emsgf(_("E474: No container to close: %.*s"), LENP(p, e)); goto json_decode_string_fail; } ContainerStackItem last_container = kv_last(container_stack); if (*p == '}' && last_container.container.v_type != VAR_DICT) { - EMSG2(_("E474: Closing list with curly bracket: %s"), p); + emsgf(_("E474: Closing list with curly bracket: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (*p == ']' && last_container.container.v_type != VAR_LIST) { - EMSG2(_("E474: Closing dictionary with square bracket: %s"), p); + emsgf(_("E474: Closing dictionary with square bracket: %.*s"), + LENP(p, e)); goto json_decode_string_fail; } else if (didcomma) { - EMSG2(_("E474: Trailing comma: %s"), p); + emsgf(_("E474: Trailing comma: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (didcolon) { - EMSG2(_("E474: Expected value after colon: %s"), p); + emsgf(_("E474: Expected value after colon: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (last_container.stack_index != kv_size(stack) - 1) { assert(last_container.stack_index < kv_size(stack) - 1); - EMSG2(_("E474: Expected value: %s"), p); + emsgf(_("E474: Expected value: %.*s"), LENP(p, e)); goto json_decode_string_fail; } if (kv_size(stack) == 1) { @@ -288,26 +291,26 @@ json_decode_string_cycle_start: } case ',': { if (kv_size(container_stack) == 0) { - EMSG2(_("E474: Comma not inside container: %s"), p); + emsgf(_("E474: Comma not inside container: %.*s"), LENP(p, e)); goto json_decode_string_fail; } ContainerStackItem last_container = kv_last(container_stack); if (didcomma) { - EMSG2(_("E474: Duplicate comma: %s"), p); + emsgf(_("E474: Duplicate comma: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (didcolon) { - EMSG2(_("E474: Comma after colon: %s"), p); + emsgf(_("E474: Comma after colon: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (last_container.container.v_type == VAR_DICT && last_container.stack_index != kv_size(stack) - 1) { - EMSG2(_("E474: Using comma in place of colon: %s"), p); + emsgf(_("E474: Using comma in place of colon: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (last_container.special_val == NULL ? (last_container.container.v_type == VAR_DICT ? (DICT_LEN(last_container.container.vval.v_dict) == 0) : (last_container.container.vval.v_list->lv_len == 0)) : (last_container.special_val->lv_len == 0)) { - EMSG2(_("E474: Leading comma: %s"), p); + emsgf(_("E474: Leading comma: %.*s"), LENP(p, e)); goto json_decode_string_fail; } didcomma = true; @@ -315,21 +318,21 @@ json_decode_string_cycle_start: } case ':': { if (kv_size(container_stack) == 0) { - EMSG2(_("E474: Colon not inside container: %s"), p); + emsgf(_("E474: Colon not inside container: %.*s"), LENP(p, e)); goto json_decode_string_fail; } ContainerStackItem last_container = kv_last(container_stack); if (last_container.container.v_type != VAR_DICT) { - EMSG2(_("E474: Using colon not in dictionary: %s"), p); + emsgf(_("E474: Using colon not in dictionary: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (last_container.stack_index != kv_size(stack) - 2) { - EMSG2(_("E474: Unexpected colon: %s"), p); + emsgf(_("E474: Unexpected colon: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (didcomma) { - EMSG2(_("E474: Colon after comma: %s"), p); + emsgf(_("E474: Colon after comma: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (didcolon) { - EMSG2(_("E474: Duplicate colon: %s"), p); + emsgf(_("E474: Duplicate colon: %.*s"), LENP(p, e)); goto json_decode_string_fail; } didcolon = true; @@ -342,7 +345,7 @@ json_decode_string_cycle_start: } case 'n': { if ((p + 3) >= e || strncmp(p + 1, "ull", 3) != 0) { - EMSG2(_("E474: Expected null: %s"), p); + emsgf(_("E474: Expected null: %.*s"), LENP(p, e)); goto json_decode_string_fail; } p += 3; @@ -355,7 +358,7 @@ json_decode_string_cycle_start: } case 't': { if ((p + 3) >= e || strncmp(p + 1, "rue", 3) != 0) { - EMSG2(_("E474: Expected true: %s"), p); + emsgf(_("E474: Expected true: %.*s"), LENP(p, e)); goto json_decode_string_fail; } p += 3; @@ -368,7 +371,7 @@ json_decode_string_cycle_start: } case 'f': { if ((p + 4) >= e || strncmp(p + 1, "alse", 4) != 0) { - EMSG2(_("E474: Expected false: %s"), p); + emsgf(_("E474: Expected false: %.*s"), LENP(p, e)); goto json_decode_string_fail; } p += 4; @@ -386,20 +389,22 @@ json_decode_string_cycle_start: if (*p == '\\') { p++; if (p == e) { - EMSG2(_("E474: Unfinished escape sequence: %s"), buf); + emsgf(_("E474: Unfinished escape sequence: %.*s"), + (int) buf_len, buf); goto json_decode_string_fail; } switch (*p) { case 'u': { if (p + 4 >= e) { - EMSG2(_("E474: Unfinished unicode escape sequence: %s"), buf); + emsgf(_("E474: Unfinished unicode escape sequence: %.*s"), + (int) buf_len, buf); goto json_decode_string_fail; } else if (!ascii_isxdigit(p[1]) || !ascii_isxdigit(p[2]) || !ascii_isxdigit(p[3]) || !ascii_isxdigit(p[4])) { - EMSG2(_("E474: Expected four hex digits after \\u: %s"), - p - 1); + emsgf(_("E474: Expected four hex digits after \\u: %.*s"), + LENP(p - 1, e)); goto json_decode_string_fail; } // One UTF-8 character below U+10000 can take up to 3 bytes, @@ -421,7 +426,7 @@ json_decode_string_cycle_start: break; } default: { - EMSG2(_("E474: Unknown escape sequence: %s"), p - 1); + emsgf(_("E474: Unknown escape sequence: %.*s"), LENP(p - 1, e)); goto json_decode_string_fail; } } @@ -429,8 +434,8 @@ json_decode_string_cycle_start: uint8_t p_byte = (uint8_t) *p; // unescaped = %x20-21 / %x23-5B / %x5D-10FFFF if (p_byte < 0x20) { - EMSG2(_("E474: ASCII control characters cannot be present " - "inside string: %s"), p); + emsgf(_("E474: ASCII control characters cannot be present " + "inside string: %.*s"), LENP(p, e)); goto json_decode_string_fail; } const int ch = utf_ptr2char((char_u *) p); @@ -442,11 +447,11 @@ json_decode_string_cycle_start: // The only exception is U+00C3 which is represented as 0xC3 0x83. if (ch >= 0x80 && p_byte == ch && !( ch == 0xC3 && p + 1 < e && (uint8_t) p[1] == 0x83)) { - EMSG2(_("E474: Only UTF-8 strings allowed: %s"), p); + emsgf(_("E474: Only UTF-8 strings allowed: %.*s"), LENP(p, e)); goto json_decode_string_fail; } else if (ch > 0x10FFFF) { - EMSG2(_("E474: Only UTF-8 code points up to U+10FFFF " - "are allowed to appear unescaped: %s"), p); + emsgf(_("E474: Only UTF-8 code points up to U+10FFFF " + "are allowed to appear unescaped: %.*s"), LENP(p, e)); goto json_decode_string_fail; } const size_t ch_len = (size_t) utf_char2len(ch); @@ -456,7 +461,7 @@ json_decode_string_cycle_start: } } if (p == e || *p != '"') { - EMSG2(_("E474: Expected string end: %s"), buf); + emsgf(_("E474: Expected string end: %.*s"), (int) buf_len, buf); goto json_decode_string_fail; } if (len == 0) { @@ -545,7 +550,8 @@ json_decode_string_cycle_start: char *const new_str = (char *) string_convert(&conv, (char_u *) str, &str_len); if (new_str == NULL) { - EMSG2(_("E474: Failed to convert string \"%s\" from UTF-8"), str); + emsgf(_("E474: Failed to convert string \"%.*s\" from UTF-8"), + (int) str_len, str); xfree(str); goto json_decode_string_fail; } @@ -619,13 +625,13 @@ json_decode_string_cycle_start: } } if (p == ints) { - EMSG2(_("E474: Missing number after minus sign: %s"), s); + emsgf(_("E474: Missing number after minus sign: %.*s"), LENP(s, e)); goto json_decode_string_fail; } else if (p == fracs) { - EMSG2(_("E474: Missing number after decimal dot: %s"), s); + emsgf(_("E474: Missing number after decimal dot: %.*s"), LENP(s, e)); goto json_decode_string_fail; } else if (p == exps) { - EMSG2(_("E474: Missing exponent: %s"), s); + emsgf(_("E474: Missing exponent: %.*s"), LENP(s, e)); goto json_decode_string_fail; } typval_T tv = { @@ -694,7 +700,7 @@ json_decode_string_cycle_start: break; } default: { - EMSG2(_("E474: Unidentified byte: %s"), p); + emsgf(_("E474: Unidentified byte: %.*s"), LENP(p, e)); goto json_decode_string_fail; } } @@ -714,13 +720,13 @@ json_decode_string_after_cycle: break; } default: { - EMSG2(_("E474: Trailing characters: %s"), p); + emsgf(_("E474: Trailing characters: %.*s"), LENP(p, e)); goto json_decode_string_fail; } } } if (kv_size(stack) > 1 || kv_size(container_stack)) { - EMSG2(_("E474: Unexpected end of input: %s"), buf); + emsgf(_("E474: Unexpected end of input: %.*s"), (int) buf_len, buf); goto json_decode_string_fail; } goto json_decode_string_ret; -- cgit From 032ac502ff1378757d9ba56e5760d362570e48e4 Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 7 Mar 2016 08:59:03 +0300 Subject: eval/decode: Do not loose high surrogates followed by high surrogates --- src/nvim/eval/decode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 266da86b74..604b758344 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -499,6 +499,7 @@ json_decode_string_cycle_start: hasnul = true; } if (SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) { + PUT_FST_IN_PAIR(fst_in_pair, str_end); fst_in_pair = (int) ch; } else if (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END && fst_in_pair != 0) { -- cgit From 9c543f2e246469adec1daddf156f4bcabe30931a Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 7 Mar 2016 09:09:39 +0300 Subject: eval/decode: Reject more numbers, accept 1e5 --- src/nvim/eval/decode.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 604b758344..75c88e308b 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -608,11 +608,13 @@ json_decode_string_cycle_start: while (p < e && ascii_isdigit(*p)) { p++; } - if (p < e && *p == '.') { - p++; - fracs = p; - while (p < e && ascii_isdigit(*p)) { + if (p < e && p != ints && (*p == '.' || *p == 'e' || *p == 'E')) { + if (*p == '.') { p++; + fracs = p; + while (p < e && ascii_isdigit(*p)) { + p++; + } } if (p < e && (*p == 'e' || *p == 'E')) { p++; @@ -628,7 +630,7 @@ json_decode_string_cycle_start: if (p == ints) { emsgf(_("E474: Missing number after minus sign: %.*s"), LENP(s, e)); goto json_decode_string_fail; - } else if (p == fracs) { + } else if (p == fracs || exps == fracs + 1) { emsgf(_("E474: Missing number after decimal dot: %.*s"), LENP(s, e)); goto json_decode_string_fail; } else if (p == exps) { @@ -639,14 +641,26 @@ json_decode_string_cycle_start: .v_type = VAR_NUMBER, .v_lock = VAR_UNLOCKED, }; - if (fracs) { + const size_t exp_num_len = (size_t) (p - s); + if (fracs || exps) { // Convert floating-point number - (void) string2float(s, &tv.vval.v_float); + const size_t num_len = string2float(s, &tv.vval.v_float); + if (exp_num_len != num_len) { + emsgf(_("E685: internal error: while converting number \"%.*s\" " + "to float string2float consumed %zu bytes in place of %zu"), + (int) exp_num_len, s, num_len, exp_num_len); + } tv.v_type = VAR_FLOAT; } else { // Convert integer long nr; - vim_str2nr((char_u *) s, NULL, NULL, 0, &nr, NULL, (int) (p - s)); + int num_len; + vim_str2nr((char_u *) s, NULL, &num_len, 0, &nr, NULL, (int) (p - s)); + if ((int) exp_num_len != num_len) { + emsgf(_("E685: internal error: while converting number \"%.*s\" " + "to float vim_str2nr consumed %i bytes in place of %zu"), + (int) exp_num_len, s, num_len, exp_num_len); + } tv.vval.v_number = (varnumber_T) nr; } POP(tv, false); -- cgit From 9a56fcb2e8e97dec1e4ebce1d1287e7ab8a6ee79 Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 7 Mar 2016 09:11:09 +0300 Subject: eval/decode: Rewrite json_decode_string end as suggested by oni-link --- src/nvim/eval/decode.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 75c88e308b..1e45336ed9 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -740,21 +740,17 @@ json_decode_string_after_cycle: } } } - if (kv_size(stack) > 1 || kv_size(container_stack)) { - emsgf(_("E474: Unexpected end of input: %.*s"), (int) buf_len, buf); - goto json_decode_string_fail; + if (kv_size(stack) == 1 && kv_size(container_stack) == 0) { + *rettv = kv_pop(stack).val; + goto json_decode_string_ret; } - goto json_decode_string_ret; + emsgf(_("E474: Unexpected end of input: %.*s"), (int) buf_len, buf); json_decode_string_fail: ret = FAIL; while (kv_size(stack)) { clear_tv(&(kv_pop(stack).val)); } json_decode_string_ret: - if (ret != FAIL) { - assert(kv_size(stack) == 1); - *rettv = kv_pop(stack).val; - } kv_destroy(stack); kv_destroy(container_stack); return ret; -- cgit From 69ce17878eb6a95e40b6e5c36c62a5ffdf2df62d Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 7 Mar 2016 10:06:16 +0300 Subject: *: Fix linter errors --- src/nvim/eval/decode.c | 663 +++++++++++++++++++++++++++++-------------------- 1 file changed, 392 insertions(+), 271 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 1e45336ed9..ec3be2cfb6 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -193,16 +193,388 @@ static inline int json_decoder_pop(ValuesStackItem obj, return OK; } -#define OBJ(obj_tv, is_sp_string) \ +#define LENP(p, e) \ + ((int) ((e) - (p))), (p) +#define OBJ(obj_tv, is_sp_string, didcomma_, didcolon_) \ ((ValuesStackItem) { \ .is_special_string = (is_sp_string), \ .val = (obj_tv), \ - .didcomma = didcomma, \ - .didcolon = didcolon, \ + .didcomma = (didcomma_), \ + .didcolon = (didcolon_), \ }) + #define POP(obj_tv, is_sp_string) \ do { \ - if (json_decoder_pop(OBJ(obj_tv, is_sp_string), &stack, &container_stack, \ + if (json_decoder_pop(OBJ(obj_tv, is_sp_string, *didcomma, *didcolon), \ + stack, container_stack, \ + &p, next_map_special, didcomma, didcolon) \ + == FAIL) { \ + goto parse_json_string_fail; \ + } \ + if (*next_map_special) { \ + goto parse_json_string_ret; \ + } \ + } while (0) + +/// Parse JSON double-quoted string +/// +/// @param[in] conv Defines conversion necessary to convert UTF-8 string to +/// &encoding. +/// @param[in] buf Buffer being converted. +/// @param[in] buf_len Length of the buffer. +/// @param[in,out] pp Pointer to the start of the string. Must point to '"'. +/// Is advanced to the closing '"'. +/// @param[out] stack Object stack. +/// @param[out] container_stack Container objects stack. +/// @param[out] next_map_special Is set to true when dictionary is converted +/// to a special map, otherwise not touched. +/// @param[out] didcomma True if previous token was comma. Is set to recorded +/// value when decoder is restarted, otherwise unused. +/// @param[out] didcolon True if previous token was colon. Is set to recorded +/// value when decoder is restarted, otherwise unused. +/// +/// @return OK in case of success, FAIL in case of error. +static inline int parse_json_string(vimconv_T *const conv, + const char *const buf, const size_t buf_len, + const char **const pp, + ValuesStack *const stack, + ContainerStack *const container_stack, + bool *const next_map_special, + bool *const didcomma, + bool *const didcolon) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE +{ + const char *const e = buf + buf_len; + const char *p = *pp; + size_t len = 0; + const char *const s = ++p; + int ret = OK; + while (p < e && *p != '"') { + if (*p == '\\') { + p++; + if (p == e) { + emsgf(_("E474: Unfinished escape sequence: %.*s"), + (int) buf_len, buf); + goto parse_json_string_fail; + } + switch (*p) { + case 'u': { + if (p + 4 >= e) { + emsgf(_("E474: Unfinished unicode escape sequence: %.*s"), + (int) buf_len, buf); + goto parse_json_string_fail; + } else if (!ascii_isxdigit(p[1]) + || !ascii_isxdigit(p[2]) + || !ascii_isxdigit(p[3]) + || !ascii_isxdigit(p[4])) { + emsgf(_("E474: Expected four hex digits after \\u: %.*s"), + LENP(p - 1, e)); + goto parse_json_string_fail; + } + // One UTF-8 character below U+10000 can take up to 3 bytes, + // above up to 6, but they are encoded using two \u escapes. + len += 3; + p += 5; + break; + } + case '\\': + case '/': + case '"': + case 't': + case 'b': + case 'n': + case 'r': + case 'f': { + len++; + p++; + break; + } + default: { + emsgf(_("E474: Unknown escape sequence: %.*s"), LENP(p - 1, e)); + goto parse_json_string_fail; + } + } + } else { + uint8_t p_byte = (uint8_t) *p; + // unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + if (p_byte < 0x20) { + emsgf(_("E474: ASCII control characters cannot be present " + "inside string: %.*s"), LENP(p, e)); + goto parse_json_string_fail; + } + const int ch = utf_ptr2char((char_u *) p); + // All characters above U+007F are encoded using two or more bytes + // and thus cannot possibly be equal to *p. But utf_ptr2char({0xFF, + // 0}) will return 0xFF, even though 0xFF cannot start any UTF-8 + // code point at all. + // + // The only exception is U+00C3 which is represented as 0xC3 0x83. + if (ch >= 0x80 && p_byte == ch + && !(ch == 0xC3 && p + 1 < e && (uint8_t) p[1] == 0x83)) { + emsgf(_("E474: Only UTF-8 strings allowed: %.*s"), LENP(p, e)); + goto parse_json_string_fail; + } else if (ch > 0x10FFFF) { + emsgf(_("E474: Only UTF-8 code points up to U+10FFFF " + "are allowed to appear unescaped: %.*s"), LENP(p, e)); + goto parse_json_string_fail; + } + const size_t ch_len = (size_t) utf_char2len(ch); + assert(ch_len == (size_t) (ch ? utf_ptr2len((char_u *) p) : 1)); + len += ch_len; + p += ch_len; + } + } + if (p == e || *p != '"') { + emsgf(_("E474: Expected string end: %.*s"), (int) buf_len, buf); + goto parse_json_string_fail; + } + if (len == 0) { + POP(((typval_T) { + .v_type = VAR_STRING, + .vval = { .v_string = NULL }, + }), false); + goto parse_json_string_ret; + } + char *str = xmalloc(len + 1); + int fst_in_pair = 0; + char *str_end = str; + bool hasnul = false; +#define PUT_FST_IN_PAIR(fst_in_pair, str_end) \ + do { \ + if (fst_in_pair != 0) { \ + str_end += utf_char2bytes(fst_in_pair, (char_u *) str_end); \ + fst_in_pair = 0; \ + } \ + } while (0) + for (const char *t = s; t < p; t++) { + if (t[0] != '\\' || t[1] != 'u') { + PUT_FST_IN_PAIR(fst_in_pair, str_end); + } + if (*t == '\\') { + t++; + switch (*t) { + case 'u': { + const char ubuf[] = { t[1], t[2], t[3], t[4] }; + t += 4; + unsigned long ch; + vim_str2nr((char_u *) ubuf, NULL, NULL, + STR2NR_HEX | STR2NR_FORCE, NULL, &ch, 4); + if (ch == 0) { + hasnul = true; + } + if (SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) { + PUT_FST_IN_PAIR(fst_in_pair, str_end); + fst_in_pair = (int) ch; + } else if (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END + && fst_in_pair != 0) { + const int full_char = ( + (int) (ch - SURROGATE_LO_START) + + ((fst_in_pair - SURROGATE_HI_START) << 10) + + SURROGATE_FIRST_CHAR); + str_end += utf_char2bytes(full_char, (char_u *) str_end); + fst_in_pair = 0; + } else { + PUT_FST_IN_PAIR(fst_in_pair, str_end); + str_end += utf_char2bytes((int) ch, (char_u *) str_end); + } + break; + } + case '\\': + case '/': + case '"': + case 't': + case 'b': + case 'n': + case 'r': + case 'f': { + static const char escapes[] = { + ['\\'] = '\\', + ['/'] = '/', + ['"'] = '"', + ['t'] = TAB, + ['b'] = BS, + ['n'] = NL, + ['r'] = CAR, + ['f'] = FF, + }; + *str_end++ = escapes[(int) *t]; + break; + } + default: { + assert(false); + } + } + } else { + *str_end++ = *t; + } + } + PUT_FST_IN_PAIR(fst_in_pair, str_end); +#undef PUT_FST_IN_PAIR + if (conv->vc_type != CONV_NONE) { + size_t str_len = (size_t) (str_end - str); + char *const new_str = (char *) string_convert(conv, (char_u *) str, + &str_len); + if (new_str == NULL) { + emsgf(_("E474: Failed to convert string \"%.*s\" from UTF-8"), + (int) str_len, str); + xfree(str); + goto parse_json_string_fail; + } + xfree(str); + str = new_str; + str_end = new_str + str_len; + } + if (hasnul) { + typval_T obj; + list_T *const list = list_alloc(); + list->lv_refcount++; + create_special_dict(&obj, kMPString, ((typval_T) { + .v_type = VAR_LIST, + .v_lock = VAR_UNLOCKED, + .vval = { .v_list = list }, + })); + if (encode_list_write((void *) list, str, (size_t) (str_end - str)) + == -1) { + clear_tv(&obj); + goto parse_json_string_fail; + } + xfree(str); + POP(obj, true); + } else { + *str_end = NUL; + POP(((typval_T) { + .v_type = VAR_STRING, + .vval = { .v_string = (char_u *) str }, + }), false); + } + goto parse_json_string_ret; +parse_json_string_fail: + ret = FAIL; +parse_json_string_ret: + *pp = p; + return ret; +} + +#undef POP + +/// Parse JSON number: both floating-point and integer +/// +/// Number format: `-?\d+(?:.\d+)?(?:[eE][+-]?\d+)?`. +/// +/// @param[in] buf Buffer being converted. +/// @param[in] buf_len Length of the buffer. +/// @param[in,out] pp Pointer to the start of the number. Must point to +/// a digit or a minus sign. Is advanced to the last +/// character of the number. +/// @param[out] stack Object stack. +/// @param[out] container_stack Container objects stack. +/// @param[out] next_map_special Is set to true when dictionary is converted +/// to a special map, otherwise not touched. +/// @param[out] didcomma True if previous token was comma. Is set to recorded +/// value when decoder is restarted, otherwise unused. +/// @param[out] didcolon True if previous token was colon. Is set to recorded +/// value when decoder is restarted, otherwise unused. +/// +/// @return OK in case of success, FAIL in case of error. +static inline int parse_json_number(const char *const buf, const size_t buf_len, + const char **const pp, + ValuesStack *const stack, + ContainerStack *const container_stack, + bool *const next_map_special, + bool *const didcomma, + bool *const didcolon) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_ALWAYS_INLINE +{ + const char *const e = buf + buf_len; + const char *p = *pp; + int ret = OK; + const char *const s = p; + const char *ints = NULL; + const char *fracs = NULL; + const char *exps = NULL; + if (*p == '-') { + p++; + } + ints = p; + while (p < e && ascii_isdigit(*p)) { + p++; + } + if (p < e && p != ints && (*p == '.' || *p == 'e' || *p == 'E')) { + if (*p == '.') { + p++; + fracs = p; + while (p < e && ascii_isdigit(*p)) { + p++; + } + } + if (p < e && (*p == 'e' || *p == 'E')) { + p++; + if (p < e && (*p == '-' || *p == '+')) { + p++; + } + exps = p; + while (p < e && ascii_isdigit(*p)) { + p++; + } + } + } + if (p == ints) { + emsgf(_("E474: Missing number after minus sign: %.*s"), LENP(s, e)); + goto parse_json_number_fail; + } else if (p == fracs || exps == fracs + 1) { + emsgf(_("E474: Missing number after decimal dot: %.*s"), LENP(s, e)); + goto parse_json_number_fail; + } else if (p == exps) { + emsgf(_("E474: Missing exponent: %.*s"), LENP(s, e)); + goto parse_json_number_fail; + } + typval_T tv = { + .v_type = VAR_NUMBER, + .v_lock = VAR_UNLOCKED, + }; + const size_t exp_num_len = (size_t) (p - s); + if (fracs || exps) { + // Convert floating-point number + const size_t num_len = string2float(s, &tv.vval.v_float); + if (exp_num_len != num_len) { + emsgf(_("E685: internal error: while converting number \"%.*s\" " + "to float string2float consumed %zu bytes in place of %zu"), + (int) exp_num_len, s, num_len, exp_num_len); + } + tv.v_type = VAR_FLOAT; + } else { + // Convert integer + long nr; + int num_len; + vim_str2nr((char_u *) s, NULL, &num_len, 0, &nr, NULL, (int) (p - s)); + if ((int) exp_num_len != num_len) { + emsgf(_("E685: internal error: while converting number \"%.*s\" " + "to float vim_str2nr consumed %i bytes in place of %zu"), + (int) exp_num_len, s, num_len, exp_num_len); + } + tv.vval.v_number = (varnumber_T) nr; + } + if (json_decoder_pop(OBJ(tv, false, *didcomma, *didcolon), + stack, container_stack, + &p, next_map_special, didcomma, didcolon) == FAIL) { + goto parse_json_number_fail; + } + if (*next_map_special) { + goto parse_json_number_ret; + } + p--; + goto parse_json_number_ret; +parse_json_number_fail: + ret = FAIL; +parse_json_number_ret: + *pp = p; + return ret; +} + +#define POP(obj_tv, is_sp_string) \ + do { \ + if (json_decoder_pop(OBJ(obj_tv, is_sp_string, didcomma, didcolon), \ + &stack, &container_stack, \ &p, &next_map_special, &didcomma, &didcolon) \ == FAIL) { \ goto json_decode_string_fail; \ @@ -223,8 +595,6 @@ int json_decode_string(const char *const buf, const size_t buf_len, typval_T *const rettv) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT { -#define LENP(p, e) \ - ((int) ((e) - (p))), (p) const char *p = buf; const char *const e = buf + buf_len; while (p < e && (*p == ' ' || *p == '\t' || *p == '\n')) { @@ -383,205 +753,14 @@ json_decode_string_cycle_start: break; } case '"': { - size_t len = 0; - const char *const s = ++p; - while (p < e && *p != '"') { - if (*p == '\\') { - p++; - if (p == e) { - emsgf(_("E474: Unfinished escape sequence: %.*s"), - (int) buf_len, buf); - goto json_decode_string_fail; - } - switch (*p) { - case 'u': { - if (p + 4 >= e) { - emsgf(_("E474: Unfinished unicode escape sequence: %.*s"), - (int) buf_len, buf); - goto json_decode_string_fail; - } else if (!ascii_isxdigit(p[1]) - || !ascii_isxdigit(p[2]) - || !ascii_isxdigit(p[3]) - || !ascii_isxdigit(p[4])) { - emsgf(_("E474: Expected four hex digits after \\u: %.*s"), - LENP(p - 1, e)); - goto json_decode_string_fail; - } - // One UTF-8 character below U+10000 can take up to 3 bytes, - // above up to 6, but they are encoded using two \u escapes. - len += 3; - p += 5; - break; - } - case '\\': - case '/': - case '"': - case 't': - case 'b': - case 'n': - case 'r': - case 'f': { - len++; - p++; - break; - } - default: { - emsgf(_("E474: Unknown escape sequence: %.*s"), LENP(p - 1, e)); - goto json_decode_string_fail; - } - } - } else { - uint8_t p_byte = (uint8_t) *p; - // unescaped = %x20-21 / %x23-5B / %x5D-10FFFF - if (p_byte < 0x20) { - emsgf(_("E474: ASCII control characters cannot be present " - "inside string: %.*s"), LENP(p, e)); - goto json_decode_string_fail; - } - const int ch = utf_ptr2char((char_u *) p); - // All characters above U+007F are encoded using two or more bytes - // and thus cannot possibly be equal to *p. But utf_ptr2char({0xFF, - // 0}) will return 0xFF, even though 0xFF cannot start any UTF-8 - // code point at all. - // - // The only exception is U+00C3 which is represented as 0xC3 0x83. - if (ch >= 0x80 && p_byte == ch && !( - ch == 0xC3 && p + 1 < e && (uint8_t) p[1] == 0x83)) { - emsgf(_("E474: Only UTF-8 strings allowed: %.*s"), LENP(p, e)); - goto json_decode_string_fail; - } else if (ch > 0x10FFFF) { - emsgf(_("E474: Only UTF-8 code points up to U+10FFFF " - "are allowed to appear unescaped: %.*s"), LENP(p, e)); - goto json_decode_string_fail; - } - const size_t ch_len = (size_t) utf_char2len(ch); - assert(ch_len == (size_t) (ch ? utf_ptr2len((char_u *) p) : 1)); - len += ch_len; - p += ch_len; - } - } - if (p == e || *p != '"') { - emsgf(_("E474: Expected string end: %.*s"), (int) buf_len, buf); + if (parse_json_string(&conv, buf, buf_len, &p, &stack, &container_stack, + &next_map_special, &didcomma, &didcolon) + == FAIL) { + // Error message was already given goto json_decode_string_fail; } - if (len == 0) { - POP(((typval_T) { - .v_type = VAR_STRING, - .vval = { .v_string = NULL }, - }), false); - break; - } - char *str = xmalloc(len + 1); - int fst_in_pair = 0; - char *str_end = str; - bool hasnul = false; -#define PUT_FST_IN_PAIR(fst_in_pair, str_end) \ - do { \ - if (fst_in_pair != 0) { \ - str_end += utf_char2bytes(fst_in_pair, (char_u *) str_end); \ - fst_in_pair = 0; \ - } \ - } while (0) - for (const char *t = s; t < p; t++) { - if (t[0] != '\\' || t[1] != 'u') { - PUT_FST_IN_PAIR(fst_in_pair, str_end); - } - if (*t == '\\') { - t++; - switch (*t) { - case 'u': { - const char ubuf[] = { t[1], t[2], t[3], t[4] }; - t += 4; - unsigned long ch; - vim_str2nr((char_u *) ubuf, NULL, NULL, - STR2NR_HEX | STR2NR_FORCE, NULL, &ch, 4); - if (ch == 0) { - hasnul = true; - } - if (SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) { - PUT_FST_IN_PAIR(fst_in_pair, str_end); - fst_in_pair = (int) ch; - } else if (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END - && fst_in_pair != 0) { - const int full_char = ( - (int) (ch - SURROGATE_LO_START) - + ((fst_in_pair - SURROGATE_HI_START) << 10) - + SURROGATE_FIRST_CHAR); - str_end += utf_char2bytes(full_char, (char_u *) str_end); - fst_in_pair = 0; - } else { - PUT_FST_IN_PAIR(fst_in_pair, str_end); - str_end += utf_char2bytes((int) ch, (char_u *) str_end); - } - break; - } - case '\\': - case '/': - case '"': - case 't': - case 'b': - case 'n': - case 'r': - case 'f': { - static const char escapes[] = { - ['\\'] = '\\', - ['/'] = '/', - ['"'] = '"', - ['t'] = TAB, - ['b'] = BS, - ['n'] = NL, - ['r'] = CAR, - ['f'] = FF, - }; - *str_end++ = escapes[(int) *t]; - break; - } - default: { - assert(false); - } - } - } else { - *str_end++ = *t; - } - } - PUT_FST_IN_PAIR(fst_in_pair, str_end); -#undef PUT_FST_IN_PAIR - if (conv.vc_type != CONV_NONE) { - size_t str_len = (size_t) (str_end - str); - char *const new_str = (char *) string_convert(&conv, (char_u *) str, - &str_len); - if (new_str == NULL) { - emsgf(_("E474: Failed to convert string \"%.*s\" from UTF-8"), - (int) str_len, str); - xfree(str); - goto json_decode_string_fail; - } - xfree(str); - str = new_str; - str_end = new_str + str_len; - } - if (hasnul) { - typval_T obj; - list_T *const list = list_alloc(); - list->lv_refcount++; - create_special_dict(&obj, kMPString, ((typval_T) { - .v_type = VAR_LIST, - .v_lock = VAR_UNLOCKED, - .vval = { .v_list = list }, - })); - if (encode_list_write((void *) list, str, (size_t) (str_end - str)) - == -1) { - clear_tv(&obj); - goto json_decode_string_fail; - } - xfree(str); - POP(obj, true); - } else { - *str_end = NUL; - POP(((typval_T) { - .v_type = VAR_STRING, - .vval = { .v_string = (char_u *) str }, - }), false); + if (next_map_special) { + goto json_decode_string_cycle_start; } break; } @@ -596,75 +775,15 @@ json_decode_string_cycle_start: case '7': case '8': case '9': { - // a.bE[+-]exp - const char *const s = p; - const char *ints = NULL; - const char *fracs = NULL; - const char *exps = NULL; - if (*p == '-') { - p++; - } - ints = p; - while (p < e && ascii_isdigit(*p)) { - p++; - } - if (p < e && p != ints && (*p == '.' || *p == 'e' || *p == 'E')) { - if (*p == '.') { - p++; - fracs = p; - while (p < e && ascii_isdigit(*p)) { - p++; - } - } - if (p < e && (*p == 'e' || *p == 'E')) { - p++; - if (p < e && (*p == '-' || *p == '+')) { - p++; - } - exps = p; - while (p < e && ascii_isdigit(*p)) { - p++; - } - } - } - if (p == ints) { - emsgf(_("E474: Missing number after minus sign: %.*s"), LENP(s, e)); - goto json_decode_string_fail; - } else if (p == fracs || exps == fracs + 1) { - emsgf(_("E474: Missing number after decimal dot: %.*s"), LENP(s, e)); - goto json_decode_string_fail; - } else if (p == exps) { - emsgf(_("E474: Missing exponent: %.*s"), LENP(s, e)); + if (parse_json_number(buf, buf_len, &p, &stack, &container_stack, + &next_map_special, &didcomma, &didcolon) + == FAIL) { + // Error message was already given goto json_decode_string_fail; } - typval_T tv = { - .v_type = VAR_NUMBER, - .v_lock = VAR_UNLOCKED, - }; - const size_t exp_num_len = (size_t) (p - s); - if (fracs || exps) { - // Convert floating-point number - const size_t num_len = string2float(s, &tv.vval.v_float); - if (exp_num_len != num_len) { - emsgf(_("E685: internal error: while converting number \"%.*s\" " - "to float string2float consumed %zu bytes in place of %zu"), - (int) exp_num_len, s, num_len, exp_num_len); - } - tv.v_type = VAR_FLOAT; - } else { - // Convert integer - long nr; - int num_len; - vim_str2nr((char_u *) s, NULL, &num_len, 0, &nr, NULL, (int) (p - s)); - if ((int) exp_num_len != num_len) { - emsgf(_("E685: internal error: while converting number \"%.*s\" " - "to float vim_str2nr consumed %i bytes in place of %zu"), - (int) exp_num_len, s, num_len, exp_num_len); - } - tv.vval.v_number = (varnumber_T) nr; + if (next_map_special) { + goto json_decode_string_cycle_start; } - POP(tv, false); - p--; break; } case '[': { @@ -681,7 +800,7 @@ json_decode_string_cycle_start: .container = tv, .special_val = NULL, })); - kv_push(ValuesStackItem, stack, OBJ(tv, false)); + kv_push(ValuesStackItem, stack, OBJ(tv, false, didcomma, didcolon)); break; } case '{': { @@ -711,7 +830,7 @@ json_decode_string_cycle_start: .container = tv, .special_val = val_list, })); - kv_push(ValuesStackItem, stack, OBJ(tv, false)); + kv_push(ValuesStackItem, stack, OBJ(tv, false, didcomma, didcolon)); break; } default: { @@ -756,7 +875,9 @@ json_decode_string_ret: return ret; } +#undef LENP #undef POP + #undef OBJ #undef DICT_LEN -- cgit From 515fea1ef09e3debee9e226f34d3e62e47e8a08d Mon Sep 17 00:00:00 2001 From: ZyX Date: Wed, 9 Mar 2016 02:08:53 +0300 Subject: eval/decode: Reject even more numbers Rejects leading zeroes and numbers like 1.e+5 (decimal dot with missing number with signed exponent). --- src/nvim/eval/decode.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index ec3be2cfb6..f74f2b3150 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -492,6 +492,7 @@ static inline int parse_json_number(const char *const buf, const size_t buf_len, const char *ints = NULL; const char *fracs = NULL; const char *exps = NULL; + const char *exps_s = NULL; if (*p == '-') { p++; } @@ -499,6 +500,10 @@ static inline int parse_json_number(const char *const buf, const size_t buf_len, while (p < e && ascii_isdigit(*p)) { p++; } + if (p != ints + 1 && *ints == '0') { + emsgf(_("E474: Leading zeroes are not allowed: %.*s"), LENP(s, e)); + goto parse_json_number_fail; + } if (p < e && p != ints && (*p == '.' || *p == 'e' || *p == 'E')) { if (*p == '.') { p++; @@ -509,6 +514,7 @@ static inline int parse_json_number(const char *const buf, const size_t buf_len, } if (p < e && (*p == 'e' || *p == 'E')) { p++; + exps_s = p; if (p < e && (*p == '-' || *p == '+')) { p++; } @@ -521,7 +527,7 @@ static inline int parse_json_number(const char *const buf, const size_t buf_len, if (p == ints) { emsgf(_("E474: Missing number after minus sign: %.*s"), LENP(s, e)); goto parse_json_number_fail; - } else if (p == fracs || exps == fracs + 1) { + } else if (p == fracs || exps_s == fracs + 1) { emsgf(_("E474: Missing number after decimal dot: %.*s"), LENP(s, e)); goto parse_json_number_fail; } else if (p == exps) { -- cgit From 0c598774d8f6358f9cdf86a56cbe1355b503907f Mon Sep 17 00:00:00 2001 From: ZyX Date: Wed, 9 Mar 2016 02:10:53 +0300 Subject: eval/decode: Fix typo in internal error message --- src/nvim/eval/decode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index f74f2b3150..b31b21b4da 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -555,7 +555,7 @@ static inline int parse_json_number(const char *const buf, const size_t buf_len, vim_str2nr((char_u *) s, NULL, &num_len, 0, &nr, NULL, (int) (p - s)); if ((int) exp_num_len != num_len) { emsgf(_("E685: internal error: while converting number \"%.*s\" " - "to float vim_str2nr consumed %i bytes in place of %zu"), + "to integer vim_str2nr consumed %i bytes in place of %zu"), (int) exp_num_len, s, num_len, exp_num_len); } tv.vval.v_number = (varnumber_T) nr; -- cgit From 2b0d46195be0792791171aa23d04ee7ba31c54c9 Mon Sep 17 00:00:00 2001 From: ZyX Date: Wed, 9 Mar 2016 02:28:12 +0300 Subject: eval/decode: Clarify meaning of some pointer arguments --- src/nvim/eval/decode.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index b31b21b4da..1303e288c3 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -81,8 +81,10 @@ static inline void create_special_dict(typval_T *const rettv, /// for error reporting and is also set when decoding is /// restarted due to the necessity of converting regular /// dictionary to a special map. -/// @param[out] next_map_special Is set to true when dictionary is converted -/// to a special map, otherwise not touched. +/// @param[out] next_map_special Is set to true when dictionary needs to be +/// converted to a special map, otherwise not +/// touched. Indicates that decoding has been +/// restarted. /// @param[out] didcomma True if previous token was comma. Is set to recorded /// value when decoder is restarted, otherwise unused. /// @param[out] didcolon True if previous token was colon. Is set to recorded @@ -223,7 +225,9 @@ static inline int json_decoder_pop(ValuesStackItem obj, /// @param[in] buf Buffer being converted. /// @param[in] buf_len Length of the buffer. /// @param[in,out] pp Pointer to the start of the string. Must point to '"'. -/// Is advanced to the closing '"'. +/// Is advanced to the closing '"'. Also see +/// json_decoder_pop(), it may set pp to another location +/// and alter next_map_special, didcomma and didcolon. /// @param[out] stack Object stack. /// @param[out] container_stack Container objects stack. /// @param[out] next_map_special Is set to true when dictionary is converted @@ -465,7 +469,9 @@ parse_json_string_ret: /// @param[in] buf_len Length of the buffer. /// @param[in,out] pp Pointer to the start of the number. Must point to /// a digit or a minus sign. Is advanced to the last -/// character of the number. +/// character of the number. Also see json_decoder_pop(), it +/// may set pp to another location and alter +/// next_map_special, didcomma and didcolon. /// @param[out] stack Object stack. /// @param[out] container_stack Container objects stack. /// @param[out] next_map_special Is set to true when dictionary is converted -- cgit From d06c2a1b1846a96a45625ad5472a235b2d249933 Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 10 Mar 2016 01:06:43 +0300 Subject: eval/decode: Do not overflow when parsing `-` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also makes if’s less nested. --- src/nvim/eval/decode.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 1303e288c3..2e9bf8fbac 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -503,6 +503,9 @@ static inline int parse_json_number(const char *const buf, const size_t buf_len, p++; } ints = p; + if (p >= e) { + goto parse_json_number_check; + } while (p < e && ascii_isdigit(*p)) { p++; } @@ -510,26 +513,31 @@ static inline int parse_json_number(const char *const buf, const size_t buf_len, emsgf(_("E474: Leading zeroes are not allowed: %.*s"), LENP(s, e)); goto parse_json_number_fail; } - if (p < e && p != ints && (*p == '.' || *p == 'e' || *p == 'E')) { - if (*p == '.') { + if (p >= e || p == ints) { + goto parse_json_number_check; + } + if (*p == '.') { + p++; + fracs = p; + while (p < e && ascii_isdigit(*p)) { p++; - fracs = p; - while (p < e && ascii_isdigit(*p)) { - p++; - } } - if (p < e && (*p == 'e' || *p == 'E')) { + if (p >= e || p == fracs) { + goto parse_json_number_check; + } + } + if (*p == 'e' || *p == 'E') { + p++; + exps_s = p; + if (p < e && (*p == '-' || *p == '+')) { + p++; + } + exps = p; + while (p < e && ascii_isdigit(*p)) { p++; - exps_s = p; - if (p < e && (*p == '-' || *p == '+')) { - p++; - } - exps = p; - while (p < e && ascii_isdigit(*p)) { - p++; - } } } +parse_json_number_check: if (p == ints) { emsgf(_("E474: Missing number after minus sign: %.*s"), LENP(s, e)); goto parse_json_number_fail; -- cgit From c129f6cfafc77d3f6e22b2ac11b5c8f2cec033d3 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 12 Mar 2016 13:47:34 +0300 Subject: eval/decode: Accept `\r` as space character --- src/nvim/eval/decode.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 2e9bf8fbac..10dd36c137 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -617,7 +617,7 @@ int json_decode_string(const char *const buf, const size_t buf_len, { const char *p = buf; const char *const e = buf + buf_len; - while (p < e && (*p == ' ' || *p == '\t' || *p == '\n')) { + while (p < e && (*p == ' ' || *p == TAB || *p == NL || *p == CAR)) { p++; } if (p == e) { @@ -730,7 +730,8 @@ json_decode_string_cycle_start: } case ' ': case TAB: - case NL: { + case NL: + case CAR: { continue; } case 'n': { @@ -870,7 +871,8 @@ json_decode_string_after_cycle: switch (*p) { case NL: case ' ': - case TAB: { + case TAB: + case CAR: { break; } default: { -- cgit From 494b1c9beef3755916048df29755d3d014902191 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 20 Mar 2016 21:31:49 +0300 Subject: *: Make set_vim_var_\* functions have proper argument types --- src/nvim/eval/decode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 10dd36c137..0774ef515f 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -575,8 +575,8 @@ parse_json_number_check: tv.vval.v_number = (varnumber_T) nr; } if (json_decoder_pop(OBJ(tv, false, *didcomma, *didcolon), - stack, container_stack, - &p, next_map_special, didcomma, didcolon) == FAIL) { + stack, container_stack, + &p, next_map_special, didcomma, didcolon) == FAIL) { goto parse_json_number_fail; } if (*next_map_special) { -- cgit From fd92e648ac206340752c420ad639f2a6dab2a579 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 20 Mar 2016 23:24:53 +0300 Subject: eval/encode: Dump FF character correctly --- src/nvim/eval/encode.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index d21347cca6..c3941924e1 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -866,6 +866,7 @@ static const char escapes[][3] = { [CAR] = "\\r", ['"'] = "\\\"", ['\\'] = "\\\\", + [FF] = "\\f", }; static const char xdigits[] = "0123456789ABCDEF"; -- cgit From c4f1b5a9383c00e0a23fdfdca096c569f05e8a1c Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 2 Apr 2016 01:18:58 +0300 Subject: eval/encode: Adjust buffer sizes passed to vim_snprintf --- src/nvim/eval/encode.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index c3941924e1..0bde6562b8 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -658,7 +658,7 @@ encode_vim_to_##name##_error_ret: \ #define CONV_NUMBER(num) \ do { \ char numbuf[NUMBUFLEN]; \ - vim_snprintf(numbuf, NUMBUFLEN - 1, "%" PRId64, (int64_t) (num)); \ + vim_snprintf(numbuf, ARRAY_SIZE(numbuf), "%" PRId64, (int64_t) (num)); \ ga_concat(gap, numbuf); \ } while (0) @@ -679,7 +679,7 @@ encode_vim_to_##name##_error_ret: \ } \ default: { \ char numbuf[NUMBUFLEN]; \ - vim_snprintf(numbuf, NUMBUFLEN - 1, "%g", flt_); \ + vim_snprintf(numbuf, ARRAY_SIZE(numbuf), "%g", flt_); \ ga_concat(gap, (char_u *) numbuf); \ } \ } \ @@ -754,7 +754,7 @@ encode_vim_to_##name##_error_ret: \ } \ } \ } \ - vim_snprintf(ebuf, NUMBUFLEN + 6, "{E724@%zu}", backref); \ + vim_snprintf(ebuf, ARRAY_SIZE(ebuf), "{E724@%zu}", backref); \ ga_concat(gap, &ebuf[0]); \ return OK; \ } while (0) @@ -783,9 +783,9 @@ DEFINE_VIML_CONV_FUNCTIONS(static, string, garray_T *const, gap) } \ } \ if (conv_type == kMPConvDict) { \ - vim_snprintf(ebuf, NUMBUFLEN + 6, "{...@%zu}", backref); \ + vim_snprintf(ebuf, ARRAY_SIZE(ebuf), "{...@%zu}", backref); \ } else { \ - vim_snprintf(ebuf, NUMBUFLEN + 6, "[...@%zu]", backref); \ + vim_snprintf(ebuf, ARRAY_SIZE(ebuf), "[...@%zu]", backref); \ } \ ga_concat(gap, &ebuf[0]); \ return OK; \ @@ -821,7 +821,7 @@ DEFINE_VIML_CONV_FUNCTIONS(, echo, garray_T *const, gap) #define CONV_UNSIGNED_NUMBER(num) \ do { \ char numbuf[NUMBUFLEN]; \ - vim_snprintf(numbuf, sizeof(numbuf), "%" PRIu64, (num)); \ + vim_snprintf(numbuf, ARRAY_SIZE(numbuf), "%" PRIu64, (num)); \ ga_concat(gap, numbuf); \ } while (0) @@ -840,7 +840,7 @@ DEFINE_VIML_CONV_FUNCTIONS(, echo, garray_T *const, gap) } \ default: { \ char numbuf[NUMBUFLEN]; \ - vim_snprintf(numbuf, NUMBUFLEN - 1, "%g", flt_); \ + vim_snprintf(numbuf, ARRAY_SIZE(numbuf), "%g", flt_); \ ga_concat(gap, (char_u *) numbuf); \ break; \ } \ -- cgit From bda0165514a582978c2da672b528562df78a2d1a Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 4 Apr 2016 04:53:07 +0300 Subject: eval/encode: Make sure that encoder can encode NULL variables Adds two undocumented v: variables: _null_list and _null_dict because I do not know a reproducible way to get such lists (though I think I heard about this) and dictionaries (do not remember hearing about them). NULL strings are obtained using $XXX_UNEXISTENT_VAR_XXX. Fixes crash in json_encode($XXX_UNEXISTENT_VAR_XXX). Other added tests worked fine before this commit. --- src/nvim/eval/encode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 0bde6562b8..88c731e92a 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -287,6 +287,9 @@ int encode_read_from_list(ListReaderState *const state, char *const buf, (val)->copyID_attr = copyID; \ } while (0) +#define TV_STRLEN(tv) \ + (tv->vval.v_string == NULL ? 0 : STRLEN(tv->vval.v_string)) + /// Define functions which convert VimL value to something else /// /// Creates function `vim_to_{name}(firstargtype firstargname, typval_T *const @@ -306,7 +309,7 @@ static int name##_convert_one_value(firstargtype firstargname, \ { \ switch (tv->v_type) { \ case VAR_STRING: { \ - CONV_STRING(tv->vval.v_string, STRLEN(tv->vval.v_string)); \ + CONV_STRING(tv->vval.v_string, TV_STRLEN(tv)); \ break; \ } \ case VAR_NUMBER: { \ -- cgit From 45304b482cbeae01af3a89358d96b0d1511213c3 Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 4 Apr 2016 04:58:21 +0300 Subject: eval/encode: Simplify loop in encode_list_write Patch made up by oni-link. --- src/nvim/eval/encode.c | 48 +++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 23 deletions(-) (limited to 'src/nvim/eval') diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 88c731e92a..c651a50be9 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -75,36 +75,38 @@ int encode_list_write(void *data, const char *buf, size_t len) const char *const end = buf + len; const char *line_end = buf; listitem_T *li = list->lv_last; - do { + + // Continue the last list element + if (li != NULL) { + line_end = xmemscan(buf, NL, len); + if (line_end != buf) { + const size_t line_length = (size_t)(line_end - buf); + char *str = (char *)li->li_tv.vval.v_string; + const size_t li_len = (str == NULL ? 0 : strlen(str)); + li->li_tv.vval.v_string = xrealloc(str, li_len + line_length + 1); + str = (char *)li->li_tv.vval.v_string + li_len; + memcpy(str, buf, line_length); + str[line_length] = 0; + memchrsub(str, NUL, NL, line_length); + } + line_end++; + } + + while (line_end < end) { const char *line_start = line_end; line_end = xmemscan(line_start, NL, (size_t) (end - line_start)); char *str = NULL; if (line_end != line_start) { - const size_t line_length = (size_t) (line_end - line_start); - if (li == NULL) { - str = xmemdupz(line_start, line_length); - } else { - const size_t li_len = (li->li_tv.vval.v_string == NULL - ? 0 - : STRLEN(li->li_tv.vval.v_string)); - li->li_tv.vval.v_string = xrealloc(li->li_tv.vval.v_string, - li_len + line_length + 1); - str = (char *) li->li_tv.vval.v_string + li_len; - memcpy(str, line_start, line_length); - str[line_length] = 0; - } + const size_t line_length = (size_t)(line_end - line_start); + str = xmemdupz(line_start, line_length); memchrsub(str, NUL, NL, line_length); } - if (li == NULL) { - list_append_allocated_string(list, str); - } else { - li = NULL; - } - if (line_end == end - 1) { - list_append_allocated_string(list, NULL); - } + list_append_allocated_string(list, str); line_end++; - } while (line_end < end); + } + if (line_end == end) { + list_append_allocated_string(list, NULL); + } return 0; } -- cgit