aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/nvim/eval/decode.c35
-rw-r--r--test/functional/eval/json_functions_spec.lua61
2 files changed, 91 insertions, 5 deletions
diff --git a/src/nvim/eval/decode.c b/src/nvim/eval/decode.c
index 23e7752ecc..29841db1b6 100644
--- a/src/nvim/eval/decode.c
+++ b/src/nvim/eval/decode.c
@@ -264,8 +264,8 @@ int json_decode_string(const char *const buf, const size_t len,
}
case '"': {
size_t len = 0;
- const char *s;
- for (s = ++p; p < e && *p != '"'; p++) {
+ const char *const s = ++p;
+ while (p < e && *p != '"') {
if (*p == '\\') {
p++;
if (p == e) {
@@ -285,9 +285,10 @@ int json_decode_string(const char *const buf, const size_t len,
p - 1);
goto json_decode_string_fail;
}
- // One UTF-8 character below U+10000 can take up to 3 bytes
+ // One UTF-8 character below U+10000 can take up to 3 bytes,
+ // above up to 6, but they are encoded using two \u escapes.
len += 3;
- p += 4;
+ p += 5;
break;
}
case '\\':
@@ -299,6 +300,7 @@ int json_decode_string(const char *const buf, const size_t len,
case 'r':
case 'f': {
len++;
+ p++;
break;
}
default: {
@@ -307,7 +309,30 @@ int json_decode_string(const char *const buf, const size_t len,
}
}
} else {
- len++;
+ uint8_t p_byte = (uint8_t) *p;
+ // unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
+ if (p_byte < 0x20) {
+ EMSG2(_("E474: ASCII control characters cannot be present "
+ "inside string: %s"), p);
+ goto json_decode_string_fail;
+ }
+ const int ch = utf_ptr2char((char_u *) p);
+ // All characters above U+007F are encoded using two or more bytes
+ // and thus cannot possibly be equal to *p. But utf_ptr2char({0xFF,
+ // 0}) will return 0xFF, even though 0xFF cannot start any UTF-8
+ // code point at all.
+ if (ch >= 0x80 && p_byte == ch) {
+ EMSG2(_("E474: Only UTF-8 strings allowed: %s"), p);
+ goto json_decode_string_fail;
+ } else if (ch > 0x10FFFF) {
+ EMSG2(_("E474: Only UTF-8 code points up to U+10FFFF "
+ "are allowed to appear unescaped: %s"), p);
+ goto json_decode_string_fail;
+ }
+ const size_t ch_len = (size_t) utf_char2len(ch);
+ assert(ch_len == (size_t) (ch ? utf_ptr2len((char_u *) p) : 1));
+ len += ch_len;
+ p += ch_len;
}
}
if (*p != '"') {
diff --git a/test/functional/eval/json_functions_spec.lua b/test/functional/eval/json_functions_spec.lua
index f979a6dd7c..9167cb2fef 100644
--- a/test/functional/eval/json_functions_spec.lua
+++ b/test/functional/eval/json_functions_spec.lua
@@ -235,6 +235,67 @@ describe('jsondecode() function', function()
eq('', funcs.jsondecode('""'))
eq('\\/"\t\b\n\r\f', funcs.jsondecode([["\\\/\"\t\b\n\r\f"]]))
eq('/a', funcs.jsondecode([["\/a"]]))
+ -- Unicode characters: 2-byte, 3-byte, 4-byte
+ eq({
+ '«',
+ 'ફ',
+ '\xF0\x90\x80\x80',
+ }, funcs.jsondecode({
+ '[',
+ '"«",',
+ '"ફ",',
+ '"\xF0\x90\x80\x80"',
+ ']',
+ }))
+ end)
+
+ it('fails on strings with invalid bytes', function()
+ eq('Vim(call):E474: Only UTF-8 strings allowed: \255"',
+ exc_exec('call jsondecode("\\t\\"\\xFF\\"")'))
+ eq('Vim(call):E474: ASCII control characters cannot be present inside string: ',
+ exc_exec('call jsondecode(["\\"\\n\\""])'))
+ -- 0xC2 starts 2-byte unicode character
+ eq('Vim(call):E474: Only UTF-8 strings allowed: \194"',
+ exc_exec('call jsondecode("\\t\\"\\xC2\\"")'))
+ -- 0xE0 0xAA starts 3-byte unicode character
+ eq('Vim(call):E474: Only UTF-8 strings allowed: \224"',
+ exc_exec('call jsondecode("\\t\\"\\xE0\\"")'))
+ eq('Vim(call):E474: Only UTF-8 strings allowed: \224\170"',
+ exc_exec('call jsondecode("\\t\\"\\xE0\\xAA\\"")'))
+ -- 0xF0 0x90 0x80 starts 4-byte unicode character
+ eq('Vim(call):E474: Only UTF-8 strings allowed: \240"',
+ exc_exec('call jsondecode("\\t\\"\\xF0\\"")'))
+ eq('Vim(call):E474: Only UTF-8 strings allowed: \240\144"',
+ exc_exec('call jsondecode("\\t\\"\\xF0\\x90\\"")'))
+ eq('Vim(call):E474: Only UTF-8 strings allowed: \240\144\128"',
+ exc_exec('call jsondecode("\\t\\"\\xF0\\x90\\x80\\"")'))
+ -- 0xF9 0x80 0x80 0x80 starts 5-byte unicode character
+ eq('Vim(call):E474: Only UTF-8 strings allowed: \xF9"',
+ exc_exec('call jsondecode("\\t\\"\\xF9\\"")'))
+ eq('Vim(call):E474: Only UTF-8 strings allowed: \xF9\x80"',
+ exc_exec('call jsondecode("\\t\\"\\xF9\\x80\\"")'))
+ eq('Vim(call):E474: Only UTF-8 strings allowed: \xF9\x80\x80"',
+ exc_exec('call jsondecode("\\t\\"\\xF9\\x80\\x80\\"")'))
+ eq('Vim(call):E474: Only UTF-8 strings allowed: \xF9\x80\x80\x80"',
+ exc_exec('call jsondecode("\\t\\"\\xF9\\x80\\x80\\x80\\"")'))
+ -- 0xFC 0x90 0x80 0x80 0x80 starts 6-byte unicode character
+ eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC"',
+ exc_exec('call jsondecode("\\t\\"\\xFC\\"")'))
+ eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC\x90"',
+ exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\"")'))
+ eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC\x90\x80"',
+ exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\x80\\"")'))
+ eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC\x90\x80\x80"',
+ exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\x80\\x80\\"")'))
+ eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC\x90\x80\x80\x80"',
+ exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\x80\\x80\\x80\\"")'))
+ -- Specification does not allow unquoted characters above 0x10FFFF
+ eq('Vim(call):E474: Only UTF-8 code points up to U+10FFFF are allowed to appear unescaped: \xF9\x80\x80\x80\x80"',
+ exc_exec('call jsondecode("\\t\\"\\xF9\\x80\\x80\\x80\\x80\\"")'))
+ eq('Vim(call):E474: Only UTF-8 code points up to U+10FFFF are allowed to appear unescaped: \xFC\x90\x80\x80\x80\x80"',
+ exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\x80\\x80\\x80\\x80\\"")'))
+ -- '"\xF9\x80\x80\x80\x80"',
+ -- '"\xFC\x90\x80\x80\x80\x80"',
end)
end)