aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--runtime/doc/eval.txt13
-rw-r--r--test/functional/eval/json_functions_spec.lua8
2 files changed, 21 insertions, 0 deletions
diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt
index d171dacad1..b1485f1195 100644
--- a/runtime/doc/eval.txt
+++ b/runtime/doc/eval.txt
@@ -4327,6 +4327,13 @@ json_decode({expr}) *json_decode()*
dictionary and for string will be emitted in case string
with NUL byte was a dictionary key.
+ Note: function treats its input as UTF-8 always regardless of
+ 'encoding' value. This is needed because JSON source is
+ supposed to be external (e.g. |readfile()|) and JSON standard
+ allows only a few encodings, of which UTF-8 is recommended and
+ the only one required to be supported. Non-UTF-8 characters
+ are an error.
+
json_encode({expr}) *json_encode()*
Convert {expr} into a JSON string. Accepts
|msgpack-special-dict| as the input. Converts from 'encoding'
@@ -4341,6 +4348,12 @@ json_encode({expr}) *json_encode()*
Non-printable characters are converted into "\u1234" escapes
or special escapes like "\t", other are dumped as-is.
+ Note: all characters above U+0079 are considered non-printable
+ when 'encoding' is not UTF-8. This function always outputs
+ UTF-8 strings as required by the standard thus when 'encoding'
+ is not unicode resulting string will look incorrect if
+ "\u1234" notation is not used.
+
keys({dict}) *keys()*
Return a |List| with all the keys of {dict}. The |List| is in
arbitrary order.
diff --git a/test/functional/eval/json_functions_spec.lua b/test/functional/eval/json_functions_spec.lua
index 8483152dbf..bed9d668fa 100644
--- a/test/functional/eval/json_functions_spec.lua
+++ b/test/functional/eval/json_functions_spec.lua
@@ -521,6 +521,14 @@ describe('json_decode() function', function()
local str = ('%s{%s"key"%s:%s[%s"val"%s,%s"val2"%s]%s,%s"key2"%s:%s1%s}%s'):gsub('%%s', s)
eq({key={'val', 'val2'}, key2=1}, funcs.json_decode(str))
end)
+
+ it('always treats input as UTF-8', function()
+ -- When &encoding is latin1 string "«" is U+00C2 U+00AB U+00C2: «Â. So if
+ -- '"«"' was parsed as latin1 json_decode would return three characters, and
+ -- only one U+00AB when this string is parsed as latin1.
+ restart('set encoding=latin1')
+ eq(('%c'):format(0xAB), funcs.json_decode('"«"'))
+ end)
end)
describe('json_encode() function', function()