diff options
-rw-r--r-- | src/nvim/eval/decode.c | 35 | ||||
-rw-r--r-- | test/functional/eval/json_functions_spec.lua | 61 |
2 files changed, 91 insertions, 5 deletions
diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c index 23e7752ecc..29841db1b6 100644 --- a/src/nvim/eval/decode.c +++ b/src/nvim/eval/decode.c @@ -264,8 +264,8 @@ int json_decode_string(const char *const buf, const size_t len, } case '"': { size_t len = 0; - const char *s; - for (s = ++p; p < e && *p != '"'; p++) { + const char *const s = ++p; + while (p < e && *p != '"') { if (*p == '\\') { p++; if (p == e) { @@ -285,9 +285,10 @@ int json_decode_string(const char *const buf, const size_t len, p - 1); goto json_decode_string_fail; } - // One UTF-8 character below U+10000 can take up to 3 bytes + // One UTF-8 character below U+10000 can take up to 3 bytes, + // above up to 6, but they are encoded using two \u escapes. len += 3; - p += 4; + p += 5; break; } case '\\': @@ -299,6 +300,7 @@ int json_decode_string(const char *const buf, const size_t len, case 'r': case 'f': { len++; + p++; break; } default: { @@ -307,7 +309,30 @@ int json_decode_string(const char *const buf, const size_t len, } } } else { - len++; + uint8_t p_byte = (uint8_t) *p; + // unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + if (p_byte < 0x20) { + EMSG2(_("E474: ASCII control characters cannot be present " + "inside string: %s"), p); + goto json_decode_string_fail; + } + const int ch = utf_ptr2char((char_u *) p); + // All characters above U+007F are encoded using two or more bytes + // and thus cannot possibly be equal to *p. But utf_ptr2char({0xFF, + // 0}) will return 0xFF, even though 0xFF cannot start any UTF-8 + // code point at all. + if (ch >= 0x80 && p_byte == ch) { + EMSG2(_("E474: Only UTF-8 strings allowed: %s"), p); + goto json_decode_string_fail; + } else if (ch > 0x10FFFF) { + EMSG2(_("E474: Only UTF-8 code points up to U+10FFFF " + "are allowed to appear unescaped: %s"), p); + goto json_decode_string_fail; + } + const size_t ch_len = (size_t) utf_char2len(ch); + assert(ch_len == (size_t) (ch ? utf_ptr2len((char_u *) p) : 1)); + len += ch_len; + p += ch_len; } } if (*p != '"') { diff --git a/test/functional/eval/json_functions_spec.lua b/test/functional/eval/json_functions_spec.lua index f979a6dd7c..9167cb2fef 100644 --- a/test/functional/eval/json_functions_spec.lua +++ b/test/functional/eval/json_functions_spec.lua @@ -235,6 +235,67 @@ describe('jsondecode() function', function() eq('', funcs.jsondecode('""')) eq('\\/"\t\b\n\r\f', funcs.jsondecode([["\\\/\"\t\b\n\r\f"]])) eq('/a', funcs.jsondecode([["\/a"]])) + -- Unicode characters: 2-byte, 3-byte, 4-byte + eq({ + '«', + 'ફ', + '\xF0\x90\x80\x80', + }, funcs.jsondecode({ + '[', + '"«",', + '"ફ",', + '"\xF0\x90\x80\x80"', + ']', + })) + end) + + it('fails on strings with invalid bytes', function() + eq('Vim(call):E474: Only UTF-8 strings allowed: \255"', + exc_exec('call jsondecode("\\t\\"\\xFF\\"")')) + eq('Vim(call):E474: ASCII control characters cannot be present inside string: ', + exc_exec('call jsondecode(["\\"\\n\\""])')) + -- 0xC2 starts 2-byte unicode character + eq('Vim(call):E474: Only UTF-8 strings allowed: \194"', + exc_exec('call jsondecode("\\t\\"\\xC2\\"")')) + -- 0xE0 0xAA starts 3-byte unicode character + eq('Vim(call):E474: Only UTF-8 strings allowed: \224"', + exc_exec('call jsondecode("\\t\\"\\xE0\\"")')) + eq('Vim(call):E474: Only UTF-8 strings allowed: \224\170"', + exc_exec('call jsondecode("\\t\\"\\xE0\\xAA\\"")')) + -- 0xF0 0x90 0x80 starts 4-byte unicode character + eq('Vim(call):E474: Only UTF-8 strings allowed: \240"', + exc_exec('call jsondecode("\\t\\"\\xF0\\"")')) + eq('Vim(call):E474: Only UTF-8 strings allowed: \240\144"', + exc_exec('call jsondecode("\\t\\"\\xF0\\x90\\"")')) + eq('Vim(call):E474: Only UTF-8 strings allowed: \240\144\128"', + exc_exec('call jsondecode("\\t\\"\\xF0\\x90\\x80\\"")')) + -- 0xF9 0x80 0x80 0x80 starts 5-byte unicode character + eq('Vim(call):E474: Only UTF-8 strings allowed: \xF9"', + exc_exec('call jsondecode("\\t\\"\\xF9\\"")')) + eq('Vim(call):E474: Only UTF-8 strings allowed: \xF9\x80"', + exc_exec('call jsondecode("\\t\\"\\xF9\\x80\\"")')) + eq('Vim(call):E474: Only UTF-8 strings allowed: \xF9\x80\x80"', + exc_exec('call jsondecode("\\t\\"\\xF9\\x80\\x80\\"")')) + eq('Vim(call):E474: Only UTF-8 strings allowed: \xF9\x80\x80\x80"', + exc_exec('call jsondecode("\\t\\"\\xF9\\x80\\x80\\x80\\"")')) + -- 0xFC 0x90 0x80 0x80 0x80 starts 6-byte unicode character + eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC"', + exc_exec('call jsondecode("\\t\\"\\xFC\\"")')) + eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC\x90"', + exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\"")')) + eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC\x90\x80"', + exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\x80\\"")')) + eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC\x90\x80\x80"', + exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\x80\\x80\\"")')) + eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC\x90\x80\x80\x80"', + exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\x80\\x80\\x80\\"")')) + -- Specification does not allow unquoted characters above 0x10FFFF + eq('Vim(call):E474: Only UTF-8 code points up to U+10FFFF are allowed to appear unescaped: \xF9\x80\x80\x80\x80"', + exc_exec('call jsondecode("\\t\\"\\xF9\\x80\\x80\\x80\\x80\\"")')) + eq('Vim(call):E474: Only UTF-8 code points up to U+10FFFF are allowed to appear unescaped: \xFC\x90\x80\x80\x80\x80"', + exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\x80\\x80\\x80\\x80\\"")')) + -- '"\xF9\x80\x80\x80\x80"', + -- '"\xFC\x90\x80\x80\x80\x80"', end) end) |