diff options
-rw-r--r-- | src/nvim/eval/encode.c | 20 | ||||
-rw-r--r-- | src/nvim/message.c | 22 | ||||
-rw-r--r-- | test/functional/eval/json_functions_spec.lua | 7 |
3 files changed, 38 insertions, 11 deletions
diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 6026189235..6fa22bfc5c 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -895,7 +895,8 @@ static inline int convert_to_json_string(garray_T *const gap, if (p_enc_conv.vc_type != CONV_NONE) { tofree = string_convert(&p_enc_conv, buf, &utf_len); if (tofree == NULL) { - EMSG2(_("E474: Failed to convert string \"%s\" to UTF-8"), utf_buf); + emsgf(_("E474: Failed to convert string \"%.*s\" to UTF-8"), + utf_len, utf_buf); return FAIL; } utf_buf = tofree; @@ -930,18 +931,21 @@ static inline int convert_to_json_string(garray_T *const gap, } default: { if (ch > 0x7F && shift == 1) { - EMSG2(_("E474: String \"%s\" contains byte that does not start any " - "UTF-8 character"), utf_buf); + emsgf(_("E474: String \"%.*s\" contains byte that does not start " + "any UTF-8 character"), + utf_len - (i - shift), utf_buf + i - shift); return FAIL; - } else if ((0xD800 <= ch && ch <= 0xDB7F) - || (0xDC00 <= ch && ch <= 0xDFFF)) { - EMSG2(_("E474: UTF-8 string contains code point which belongs " - "to surrogate pairs: %s"), utf_buf + i); + } else if ((SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) + || (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END)) { + emsgf(_("E474: UTF-8 string contains code point which belongs " + "to a surrogate pair: %.*s"), + utf_len - (i - shift), utf_buf + i - shift); return FAIL; } else if (ENCODE_RAW(p_enc_conv, ch)) { str_len += shift; } else { - str_len += ((sizeof("\\u1234") - 1) * (size_t) (1 + (ch > 0xFFFF))); + str_len += ((sizeof("\\u1234") - 1) + * (size_t) (1 + (ch >= SURROGATE_FIRST_CHAR))); } break; } diff --git a/src/nvim/message.c b/src/nvim/message.c index 1dd71baaa4..c4207fbe9e 100644 --- a/src/nvim/message.c +++ b/src/nvim/message.c @@ -609,6 +609,21 @@ int emsgu(char_u *s, uint64_t n) return emsg(IObuff); } +/// Print an error message with unknown number of arguments +bool emsgf(const char *const fmt, ...) +{ + if (emsg_not_now()) { + return true; + } + + va_list ap; + va_start(ap, fmt); + vim_vsnprintf((char *) IObuff, IOSIZE, fmt, ap, NULL); + va_end(ap); + + return emsg(IObuff); +} + /* * Like msg(), but truncate to a single line if p_shm contains 't', or when * "force" is TRUE. This truncates in another way as for normal messages. @@ -3097,11 +3112,12 @@ int vim_snprintf(char *str, size_t str_m, char *fmt, ...) return str_l; } -int vim_vsnprintf(char *str, size_t str_m, char *fmt, va_list ap, typval_T *tvs) +int vim_vsnprintf(char *str, size_t str_m, const char *fmt, va_list ap, + typval_T *tvs) { size_t str_l = 0; bool str_avail = str_l < str_m; - char *p = fmt; + const char *p = fmt; int arg_idx = 1; if (!p) { @@ -3135,7 +3151,7 @@ int vim_vsnprintf(char *str, size_t str_m, char *fmt, va_list ap, typval_T *tvs) char tmp[TMP_LEN]; // string address in case of string argument - char *str_arg; + const char *str_arg; // natural field width of arg without padding and sign size_t str_arg_l; diff --git a/test/functional/eval/json_functions_spec.lua b/test/functional/eval/json_functions_spec.lua index 13597eb7a0..398fab6c4b 100644 --- a/test/functional/eval/json_functions_spec.lua +++ b/test/functional/eval/json_functions_spec.lua @@ -663,4 +663,11 @@ describe('jsonencode() function', function() eq(1, eval('"\x01" =~# "\\\\p"')) eq('"\\u0001"', funcs.jsonencode('\x01')) end) + + it('fails when using surrogate character in a UTF-8 string', function() + eq('Vim(call):E474: UTF-8 string contains code point which belongs to a surrogate pair: \xED\xA0\x80', + exc_exec('call jsonencode("\xED\xA0\x80")')) + eq('Vim(call):E474: UTF-8 string contains code point which belongs to a surrogate pair: \xED\xAF\xBF', + exc_exec('call jsonencode("\xED\xAF\xBF")')) + end) end) |