diff options
author | ZyX <kp-pav@yandex.ru> | 2016-02-11 01:29:09 +0300 |
---|---|---|
committer | ZyX <kp-pav@yandex.ru> | 2016-04-18 02:47:13 +0300 |
commit | f0bd4a149408e75ebf887530964e0948518938dc (patch) | |
tree | 06c489c6a8b506a122a0d93448c7bc9418dd8b5b /src/nvim/eval/encode.c | |
parent | 77776b09c684bc2a0c42114fce5a8b04409ec91d (diff) | |
download | rneovim-f0bd4a149408e75ebf887530964e0948518938dc.tar.gz rneovim-f0bd4a149408e75ebf887530964e0948518938dc.tar.bz2 rneovim-f0bd4a149408e75ebf887530964e0948518938dc.zip |
eval/encode: Fix invalid UTF-8 strings handling:
1. Do not allow reading past buffer end when creating error messages.
2. Fix surrogate pairs range, avoid magic constants.
Diffstat (limited to 'src/nvim/eval/encode.c')
-rw-r--r-- | src/nvim/eval/encode.c | 20 |
1 files changed, 12 insertions, 8 deletions
diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 6026189235..6fa22bfc5c 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -895,7 +895,8 @@ static inline int convert_to_json_string(garray_T *const gap, if (p_enc_conv.vc_type != CONV_NONE) { tofree = string_convert(&p_enc_conv, buf, &utf_len); if (tofree == NULL) { - EMSG2(_("E474: Failed to convert string \"%s\" to UTF-8"), utf_buf); + emsgf(_("E474: Failed to convert string \"%.*s\" to UTF-8"), + utf_len, utf_buf); return FAIL; } utf_buf = tofree; @@ -930,18 +931,21 @@ static inline int convert_to_json_string(garray_T *const gap, } default: { if (ch > 0x7F && shift == 1) { - EMSG2(_("E474: String \"%s\" contains byte that does not start any " - "UTF-8 character"), utf_buf); + emsgf(_("E474: String \"%.*s\" contains byte that does not start " + "any UTF-8 character"), + utf_len - (i - shift), utf_buf + i - shift); return FAIL; - } else if ((0xD800 <= ch && ch <= 0xDB7F) - || (0xDC00 <= ch && ch <= 0xDFFF)) { - EMSG2(_("E474: UTF-8 string contains code point which belongs " - "to surrogate pairs: %s"), utf_buf + i); + } else if ((SURROGATE_HI_START <= ch && ch <= SURROGATE_HI_END) + || (SURROGATE_LO_START <= ch && ch <= SURROGATE_LO_END)) { + emsgf(_("E474: UTF-8 string contains code point which belongs " + "to a surrogate pair: %.*s"), + utf_len - (i - shift), utf_buf + i - shift); return FAIL; } else if (ENCODE_RAW(p_enc_conv, ch)) { str_len += shift; } else { - str_len += ((sizeof("\\u1234") - 1) * (size_t) (1 + (ch > 0xFFFF))); + str_len += ((sizeof("\\u1234") - 1) + * (size_t) (1 + (ch >= SURROGATE_FIRST_CHAR))); } break; } |