diff options
author | ZyX <kp-pav@yandex.ru> | 2016-02-06 23:07:53 +0300 |
---|---|---|
committer | ZyX <kp-pav@yandex.ru> | 2016-04-18 02:46:34 +0300 |
commit | 569e404622900222d88d856adbc6421734146bea (patch) | |
tree | 953b3bdf862bc3a268d431e2d389f7e069005a0d | |
parent | 7124329bd915e3896b7f09083ff394cd7f598cb8 (diff) | |
download | rneovim-569e404622900222d88d856adbc6421734146bea.tar.gz rneovim-569e404622900222d88d856adbc6421734146bea.tar.bz2 rneovim-569e404622900222d88d856adbc6421734146bea.zip |
eval/encode: Fix non-utf-8 &encoding handling, add tests
-rw-r--r-- | src/nvim/eval/encode.c | 45 | ||||
-rw-r--r-- | test/functional/eval/json_functions_spec.lua | 27 |
2 files changed, 52 insertions, 20 deletions
diff --git a/src/nvim/eval/encode.c b/src/nvim/eval/encode.c index 2df689990a..b29a4c6f21 100644 --- a/src/nvim/eval/encode.c +++ b/src/nvim/eval/encode.c @@ -882,11 +882,11 @@ static inline int convert_to_json_string(garray_T *const gap, const size_t len) FUNC_ATTR_NONNULL_ARG(1) FUNC_ATTR_ALWAYS_INLINE { - const char *buf_ = buf; - if (buf_ == NULL) { + const char *utf_buf = buf; + if (utf_buf == NULL) { ga_concat(gap, "\"\""); } else { - size_t len_ = len; + size_t utf_len = len; char *tofree = NULL; if (last_p_enc != (const void *) p_enc) { p_enc_conv.vc_type = CONV_NONE; @@ -895,17 +895,28 @@ static inline int convert_to_json_string(garray_T *const gap, last_p_enc = p_enc; } if (p_enc_conv.vc_type != CONV_NONE) { - tofree = string_convert(&p_enc_conv, buf_, &len_); + tofree = string_convert(&p_enc_conv, buf, &utf_len); if (tofree == NULL) { - EMSG2(_("E474: Failed to convert string \"%s\" to UTF-8"), buf_); + EMSG2(_("E474: Failed to convert string \"%s\" to UTF-8"), utf_buf); return FAIL; } - buf_ = tofree; + utf_buf = tofree; } size_t str_len = 0; - for (size_t i = 0; i < len_;) { - const int ch = utf_ptr2char(buf + i); - const size_t shift = (ch == 0? 1: utf_ptr2len(buf + i)); + // Encode character as \u0000 if + // 1. It is an ASCII control character (0x0 .. 0x1F, 0x7F). + // 2. &encoding is not UTF-8 and code point is above 0x7F. + // 3. &encoding is UTF-8 and code point is not printable according to + // utf_printable(). + // This is done to make it possible to :echo values when &encoding is not + // UTF-8. +#define ENCODE_RAW(p_enc_conv, ch) \ + (ch >= 0x20 && (p_enc_conv.vc_type == CONV_NONE \ + ? utf_printable(ch) \ + : ch < 0x7F)) + for (size_t i = 0; i < utf_len;) { + const int ch = utf_ptr2char(utf_buf + i); + const size_t shift = (ch == 0? 1: utf_ptr2len(utf_buf + i)); assert(shift > 0); i += shift; switch (ch) { @@ -922,14 +933,14 @@ static inline int convert_to_json_string(garray_T *const gap, default: { if (ch > 0x7F && shift == 1) { EMSG2(_("E474: String \"%s\" contains byte that does not start any " - "UTF-8 character"), buf_); + "UTF-8 character"), utf_buf); return FAIL; } else if ((0xD800 <= ch && ch <= 0xDB7F) || (0xDC00 <= ch && ch <= 0xDFFF)) { EMSG2(_("E474: UTF-8 string contains code point which belongs " - "to surrogate pairs"), buf_); + "to surrogate pairs: %s"), utf_buf + i); return FAIL; - } else if (vim_isprintc(ch)) { + } else if (ENCODE_RAW(p_enc_conv, ch)) { str_len += shift; } else { str_len += ((sizeof("\\u1234") - 1) * (size_t) (1 + (ch > 0xFFFF))); @@ -940,12 +951,12 @@ static inline int convert_to_json_string(garray_T *const gap, } ga_append(gap, '"'); ga_grow(gap, (int) str_len); - for (size_t i = 0; i < len_;) { - const int ch = utf_ptr2char(buf + i); + for (size_t i = 0; i < utf_len;) { + const int ch = utf_ptr2char(utf_buf + i); const size_t shift = (ch == 0? 1: utf_char2len(ch)); assert(shift > 0); // Is false on invalid unicode, but this should already be handled. - assert(ch == 0 || shift == utf_ptr2len(buf + i)); + assert(ch == 0 || shift == utf_ptr2len(utf_buf + i)); switch (ch) { case BS: case TAB: @@ -958,8 +969,8 @@ static inline int convert_to_json_string(garray_T *const gap, break; } default: { - if (vim_isprintc(ch)) { - ga_concat_len(gap, buf + i, shift); + if (ENCODE_RAW(p_enc_conv, ch)) { + ga_concat_len(gap, utf_buf + i, shift); } else if (ch < SURROGATE_FIRST_CHAR) { ga_concat_len(gap, ((const char[]) { '\\', 'u', diff --git a/test/functional/eval/json_functions_spec.lua b/test/functional/eval/json_functions_spec.lua index 6f81a36479..13597eb7a0 100644 --- a/test/functional/eval/json_functions_spec.lua +++ b/test/functional/eval/json_functions_spec.lua @@ -8,8 +8,8 @@ local execute = helpers.execute local exc_exec = helpers.exc_exec describe('jsondecode() function', function() - before_each(function() - clear() + local restart = function(cmd) + clear(cmd) execute([[ function Eq(exp, act) let act = a:act @@ -53,7 +53,8 @@ describe('jsondecode() function', function() endif endfunction ]]) - end) + end + before_each(restart) local speq = function(expected, actual_expr) eq(1, funcs.EvalEq(expected, actual_expr)) @@ -396,6 +397,7 @@ describe('jsondecode() function', function() it('parses strings with NUL properly', function() sp_decode_eq({_TYPE='string', _VAL={'\n'}}, '"\\u0000"') sp_decode_eq({_TYPE='string', _VAL={'\n', '\n'}}, '"\\u0000\\n\\u0000"') + sp_decode_eq({_TYPE='string', _VAL={'\n«\n'}}, '"\\u0000\\u00AB\\u0000"') end) it('parses dictionaries with duplicate keys to special maps', function() @@ -436,6 +438,12 @@ describe('jsondecode() function', function() sp_decode_eq({_TYPE='map', _VAL={{'b', 3}, {'a', 1}, {'c', 4}, {'d', 2}, {{_TYPE='string', _VAL={'\n'}}, 4}}}, '{"b": 3, "a": 1, "c": 4, "d": 2, "\\u0000": 4}') end) + + it('converts strings to latin1 when &encoding is latin1', function() + restart('set encoding=latin1') + eq('\xAB', funcs.jsondecode('"\\u00AB"')) + sp_decode_eq({_TYPE='string', _VAL={'\n\xAB\n'}}, '"\\u0000\\u00AB\\u0000"') + end) end) describe('jsonencode() function', function() @@ -447,6 +455,7 @@ describe('jsonencode() function', function() eq('"\\t"', funcs.jsonencode('\t')) eq('"\\n"', funcs.jsonencode('\n')) eq('"\\u001B"', funcs.jsonencode('\27')) + eq('"þÿþ"', funcs.jsonencode('þÿþ')) end) it('dumps numbers', function() @@ -642,4 +651,16 @@ describe('jsonencode() function', function() eq('Vim(call):E118: Too many arguments for function: jsonencode', exc_exec('call jsonencode(["", ""], 1)')) end) + + it('converts strings from latin1 when &encoding is latin1', function() + clear('set encoding=latin1') + eq('"\\u00AB"', funcs.jsonencode('\xAB')) + eq('"\\u0000\\u00AB\\u0000"', eval('jsonencode({"_TYPE": v:msgpack_types.string, "_VAL": ["\\n\xAB\\n"]})')) + end) + + it('ignores improper values in &isprint', function() + meths.set_option('isprint', '1') + eq(1, eval('"\x01" =~# "\\\\p"')) + eq('"\\u0001"', funcs.jsonencode('\x01')) + end) end) |