diff options
Diffstat (limited to 'src/nvim/eval/decode.c')
-rw-r--r-- | src/nvim/eval/decode.c | 35 |
1 files changed, 30 insertions, 5 deletions
diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 23e7752ecc..29841db1b6 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -264,8 +264,8 @@ int json_decode_string(const char *const buf, const size_t len, } case '"': { size_t len = 0; - const char *s; - for (s = ++p; p < e && *p != '"'; p++) { + const char *const s = ++p; + while (p < e && *p != '"') { if (*p == '\\') { p++; if (p == e) { @@ -285,9 +285,10 @@ int json_decode_string(const char *const buf, const size_t len, p - 1); goto json_decode_string_fail; } - // One UTF-8 character below U+10000 can take up to 3 bytes + // One UTF-8 character below U+10000 can take up to 3 bytes, + // above up to 6, but they are encoded using two \u escapes. len += 3; - p += 4; + p += 5; break; } case '\\': @@ -299,6 +300,7 @@ int json_decode_string(const char *const buf, const size_t len, case 'r': case 'f': { len++; + p++; break; } default: { @@ -307,7 +309,30 @@ int json_decode_string(const char *const buf, const size_t len, } } } else { - len++; + uint8_t p_byte = (uint8_t) *p; + // unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + if (p_byte < 0x20) { + EMSG2(_("E474: ASCII control characters cannot be present " + "inside string: %s"), p); + goto json_decode_string_fail; + } + const int ch = utf_ptr2char((char_u *) p); + // All characters above U+007F are encoded using two or more bytes + // and thus cannot possibly be equal to *p. But utf_ptr2char({0xFF, + // 0}) will return 0xFF, even though 0xFF cannot start any UTF-8 + // code point at all. + if (ch >= 0x80 && p_byte == ch) { + EMSG2(_("E474: Only UTF-8 strings allowed: %s"), p); + goto json_decode_string_fail; + } else if (ch > 0x10FFFF) { + EMSG2(_("E474: Only UTF-8 code points up to U+10FFFF " + "are allowed to appear unescaped: %s"), p); + goto json_decode_string_fail; + } + const size_t ch_len = (size_t) utf_char2len(ch); + assert(ch_len == (size_t) (ch ? utf_ptr2len((char_u *) p) : 1)); + len += ch_len; + p += ch_len; } } if (*p != '"') { |