diff options
Diffstat (limited to 'src/nvim/viml/parser')
-rw-r--r-- | src/nvim/viml/parser/expressions.c | 376 | ||||
-rw-r--r-- | src/nvim/viml/parser/expressions.h | 7 |
2 files changed, 375 insertions, 8 deletions
diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 0613cc66d5..3f30fe2a0e 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -915,8 +915,6 @@ static inline void viml_pexpr_debug_print_token( // // NVimIdentifierKey -> Identifier // -// NVimFigureBrace -> NVimInternalError -// // NVimUnaryPlus -> NVimUnaryOperator // NVimBinaryPlus -> NVimBinaryOperator // NVimConcat -> NVimBinaryOperator @@ -929,6 +927,18 @@ static inline void viml_pexpr_debug_print_token( // NVimNestingParenthesis -> NVimParenthesis // NVimCallingParenthesis -> NVimParenthesis // +// NVimString -> String +// NVimStringSpecial -> SpecialChar +// NVimSingleQuote -> NVimString +// NVimSingleQuotedBody -> NVimString +// NVimSingleQuotedQuote -> NVimStringSpecial +// NVimDoubleQuote -> NVimString +// NVimDoubleQuotedBody -> NVimString +// NVimDoubleQuotedEscape -> NVimStringSpecial +// NVimDoubleQuotedUnknownEscape -> NVimInvalid +// +// " Note: NVimDoubleQuotedUnknownEscape is not actually invalid +// // NVimInvalidComma -> NVimInvalidDelimiter // NVimInvalidSpacing -> NVimInvalid // NVimInvalidTernary -> NVimInvalidOperator @@ -952,6 +962,19 @@ static inline void viml_pexpr_debug_print_token( // NVimInvalidList -> NVimInvalidDelimiter // NVimInvalidSubscript -> NVimInvalidDelimiter // NVimInvalidSubscriptColon -> NVimInvalidSubscript +// NVimInvalidString -> NVimInvalidValue +// NVimInvalidStringSpecial -> NVimInvalidString +// NVimInvalidSingleQuote -> NVimInvalidString +// NVimInvalidSingleQuotedBody -> NVimInvalidString +// NVimInvalidSingleQuotedQuote -> NVimInvalidStringSpecial +// NVimInvalidDoubleQuote -> NVimInvalidString +// NVimInvalidDoubleQuotedBody -> NVimInvalidString +// NVimInvalidDoubleQuotedEscape -> NVimInvalidStringSpecial +// NVimInvalidDoubleQuotedUnknownEscape -> NVimInvalid +// +// NVimFigureBrace -> NVimInternalError +// NVimInvalidSingleQuotedUnknownEscape -> NVimInternalError +// NVimSingleQuotedUnknownEscape -> NVimInternalError /// Allocate a new node and set some of the values /// @@ -1402,6 +1425,318 @@ static inline void east_set_error(const ParserState *const pstate, } \ } while (0) +/// Structure used to define “string shifts” necessary to map string +/// highlighting to actual strings. +typedef struct { + size_t start; ///< Where special character starts in original string. + size_t orig_len; ///< Length of orininal string (e.g. 4 for "\x80"). + size_t act_len; ///< Length of resulting character(s) (e.g. 1 for "\x80"). + bool escape_not_known; ///< True if escape sequence in original is not known. +} StringShift; + +/// Parse and highlight single- or double-quoted string +/// +/// Function is supposed to detect and highlight regular expressions (but does +/// not do now). +/// +/// @param[out] pstate Parser state which also contains a place where +/// highlighting is saved. +/// @param[out] node Node where string parsing results are saved. +/// @param[in] token Token to highlight. +/// @param[in] ast_stack Parser AST stack, used to detect whether current +/// string is a regex. +/// @param[in] is_invalid Whether currently processed token is not valid. +static void parse_quoted_string(ParserState *const pstate, + ExprASTNode *const node, + const LexExprToken token, + const ExprASTStack ast_stack, + const bool is_invalid) + FUNC_ATTR_NONNULL_ALL +{ + const ParserLine pline = pstate->reader.lines.items[token.start.line]; + const char *const s = pline.data + token.start.col; + const char *const e = s + token.len - token.data.str.closed; + const char *p = s + 1; + const bool is_double = (token.type == kExprLexDoubleQuotedString); + size_t size = token.len - token.data.str.closed - 1; + kvec_withinit_t(StringShift, 16) shifts; + kvi_init(shifts); + if (!is_double) { + viml_parser_highlight(pstate, token.start, 1, HL(SingleQuotedString)); + while (p < e) { + const char *const chunk_e = memchr(p, '\'', (size_t)(e - p)); + if (chunk_e == NULL) { + break; + } + size--; + p = chunk_e + 2; + if (pstate->colors) { + kvi_push(shifts, ((StringShift) { + .start = token.start.col + (size_t)(chunk_e - s), + .orig_len = 2, + .act_len = 1, + .escape_not_known = false, + })); + } + } + node->data.str.size = size; + if (size == 0) { + node->data.str.value = NULL; + } else { + char *v_p; + v_p = node->data.str.value = xmalloc(size); + p = s + 1; + while (p < e) { + const char *const chunk_e = memchr(p, '\'', (size_t)(e - p)); + if (chunk_e == NULL) { + memcpy(v_p, p, (size_t)(e - p)); + break; + } + memcpy(v_p, p, (size_t)(chunk_e - p)); + v_p += (size_t)(chunk_e - p) + 1; + v_p[-1] = '\''; + p = chunk_e + 2; + } + } + } else { + viml_parser_highlight(pstate, token.start, 1, HL(DoubleQuotedString)); + for (p = s + 1; p < e; p++) { + if (*p == '\\' && p + 1 < e) { + p++; + if (p + 1 == e) { + size--; + break; + } + switch (*p) { + // A "\<x>" form occupies at least 4 characters, and produces up to + // 6 characters: reserve space for 2 extra, but do not compute actual + // length just now, it would be costy. + case '<': { + size += 2; + break; + } + // Hexadecimal, always single byte, but at least three bytes each. + case 'x': case 'X': { + size--; + if (ascii_isxdigit(p[1])) { + size--; + if (p + 2 < e && ascii_isxdigit(p[2])) { + size--; + } + } + break; + } + // Unicode + // + // \uF takes 1 byte which is 2 bytes less then escape sequence. + // \uFF: 2 bytes, 2 bytes less. + // \uFFF: 3 bytes, 2 bytes less. + // \uFFFF: 3 bytes, 3 bytes less. + // \UFFFFF: 4 bytes, 3 bytes less. + // \UFFFFFF: 5 bytes, 3 bytes less. + // \UFFFFFFF: 6 bytes, 3 bytes less. + // \U7FFFFFFF: 6 bytes, 4 bytes less. + case 'u': case 'U': { + const char *const esc_start = p; + size_t n = (*p == 'u' ? 4 : 8); + int nr = 0; + p++; + while (n-- && ascii_isxdigit(p[1])) { + p++; + nr = (nr << 4) + hex2nr(*p); + } + // Escape length: (esc_start - 1) points to "\\", esc_start to "u" + // or "U", p to the byte after last byte. So escape sequence + // occupies p - (esc_start - 1), but it stands for a utf_char2len + // bytes. + size -= (size_t)((p - (esc_start - 1)) - utf_char2len(nr)); + p--; + break; + } + // Octal, always single byte, but at least two bytes each. + case '0': case '1': case '2': case '3': case '4': case '5': case '6': + case '7': { + size--; + p++; + if (*p >= '0' && *p <= '7') { + size--; + p++; + if (*p >= '0' && *p <= '7') { + size--; + p++; + } + } + break; + } + default: { + size--; + break; + } + } + } + } + if (size == 0) { + node->data.str.value = NULL; + node->data.str.size = 0; + } else { + char *v_p; + v_p = node->data.str.value = xmalloc(size); + p = s + 1; + while (p < e) { + const char *const chunk_e = memchr(p, '\\', (size_t)(e - p)); + if (chunk_e == NULL) { + memcpy(v_p, p, (size_t)(e - p)); + v_p += e - p; + break; + } + memcpy(v_p, p, (size_t)(chunk_e - p)); + v_p += (size_t)(chunk_e - p); + p = chunk_e + 1; + if (p == e) { + *v_p++ = '\\'; + break; + } + bool is_unknown = false; + const char *const v_p_start = v_p; + switch (*p) { +#define SINGLE_CHAR_ESC(ch, real_ch) \ + case ch: { \ + *v_p++ = real_ch; \ + p++; \ + break; \ + } + SINGLE_CHAR_ESC('b', BS) + SINGLE_CHAR_ESC('e', ESC) + SINGLE_CHAR_ESC('f', FF) + SINGLE_CHAR_ESC('n', NL) + SINGLE_CHAR_ESC('r', CAR) + SINGLE_CHAR_ESC('t', TAB) + SINGLE_CHAR_ESC('"', '"') + SINGLE_CHAR_ESC('\\', '\\') +#undef SINGLE_CHAR_ESC + + // Hexadecimal or unicode. + case 'X': case 'x': case 'u': case 'U': { + if (ascii_isxdigit(p[1])) { + size_t n; + int nr; + bool is_hex = (*p == 'x' || *p == 'X'); + + if (is_hex) { + n = 2; + } else if (*p == 'u') { + n = 4; + } else { + n = 8; + } + nr = 0; + while (n-- && ascii_isxdigit(p[1])) { + p++; + nr = (nr << 4) + hex2nr(*p); + } + p++; + if (is_hex) { + *v_p++ = (char)nr; + } else { + v_p += utf_char2bytes(nr, (char_u *)v_p); + } + } else { + is_unknown = true; + *v_p++ = *p; + p++; + } + break; + } + // Octal: "\1", "\12", "\123". + case '0': case '1': case '2': case '3': case '4': case '5': case '6': + case '7': { + uint8_t ch = (uint8_t)(*p++ - '0'); + if (*p >= '0' && *p <= '7') { + ch = (uint8_t)((ch << 3) + *p++ - '0'); + if (*p >= '0' && *p <= '7') { + ch = (uint8_t)((ch << 3) + *p++ - '0'); + } + } + *v_p++ = (char)ch; + break; + } + // Special key, e.g.: "\<C-W>" + case '<': { + const size_t special_len = ( + trans_special((const char_u **)&p, (size_t)(e - p), + (char_u *)v_p, true, true)); + if (special_len != 0) { + v_p += special_len; + } else { + is_unknown = true; + mb_copy_char((const char_u **)&p, (char_u **)&v_p); + } + break; + } + default: { + is_unknown = true; + mb_copy_char((const char_u **)&p, (char_u **)&v_p); + break; + } + } + if (pstate->colors) { + kvi_push(shifts, ((StringShift) { + .start = token.start.col + (size_t)(chunk_e - s), + .orig_len = (size_t)(p - chunk_e), + .act_len = (size_t)(v_p - (char *)v_p_start), + .escape_not_known = is_unknown, + })); + } + } + node->data.str.size = (size_t)(v_p - node->data.str.value); + } + } + if (pstate->colors) { + // TODO(ZyX-I): use ast_stack to determine and highlight regular expressions + // TODO(ZyX-I): use ast_stack to determine and highlight printf format str + // TODO(ZyX-I): use ast_stack to determine and highlight expression strings + size_t next_col = 1; + const char *const body_str = (is_double + ? HL(DoubleQuotedBody) + : HL(SingleQuotedBody)); + const char *const esc_str = (is_double + ? HL(DoubleQuotedEscape) + : HL(SingleQuotedQuote)); + const char *const ukn_esc_str = (is_double + ? HL(DoubleQuotedUnknownEscape) + : HL(SingleQuotedUnknownEscape)); + for (size_t i = 0; i < kv_size(shifts); i++) { + const StringShift cur_shift = kv_A(shifts, i); + if (cur_shift.start > next_col) { + viml_parser_highlight(pstate, shifted_pos(token.start, next_col), + cur_shift.start - next_col, + body_str); + } + viml_parser_highlight(pstate, shifted_pos(token.start, cur_shift.start), + cur_shift.orig_len, + (cur_shift.escape_not_known + ? ukn_esc_str + : esc_str)); + next_col = cur_shift.start + cur_shift.orig_len; + } + if (next_col < token.len - token.data.str.closed) { + viml_parser_highlight(pstate, shifted_pos(token.start, next_col), + token.len - token.data.str.closed - next_col, + body_str); + } + } + if (token.data.str.closed) { + if (is_double) { + viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1), + 1, HL(DoubleQuotedString)); + } else { + viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1), + 1, HL(SingleQuotedString)); + } + } + kvi_destroy(shifts); +} + /// Parse one VimL expression /// /// @param pstate Parser state. @@ -1714,12 +2049,7 @@ viml_pexpr_parse_invalid_comma: } else if (eastnode_lvl >= kEOpLvlComma) { can_be_ternary = false; } else { -viml_pexpr_parse_invalid_colon: - ERROR_FROM_TOKEN_AND_MSG( - cur_token, - _("E15: Colon outside of dictionary or ternary operator: " - "%.*s")); - break; + goto viml_pexpr_parse_invalid_colon; } if (i == kv_size(ast_stack) - 1) { goto viml_pexpr_parse_invalid_colon; @@ -1741,6 +2071,12 @@ viml_pexpr_parse_invalid_colon: ADD_OP_NODE(cur_node); HL_CUR_TOKEN(SubscriptColon); } else { + goto viml_pexpr_parse_valid_colon; +viml_pexpr_parse_invalid_colon: + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Colon outside of dictionary or ternary operator: %.*s")); +viml_pexpr_parse_valid_colon: ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); if (is_ternary) { @@ -2201,6 +2537,30 @@ viml_pexpr_parse_no_paren_closing_error: {} kvi_push(ast_stack, &ter_val_node->children); break; } + case kExprLexDoubleQuotedString: + case kExprLexSingleQuotedString: { + const bool is_double = (tok_type == kExprLexDoubleQuotedString); + if (!cur_token.data.str.closed) { + // It is weird, but Vim has two identical errors messages with + // different error numbers: "E114: Missing quote" and + // "E115: Missing quote". + ERROR_FROM_TOKEN_AND_MSG( + cur_token, (is_double + ? _("E114: Missing double quote: %.*s") + : _("E115: Missing single quote: %.*s"))); + } + if (want_node == kENodeOperator) { + OP_MISSING; + } + NEW_NODE_WITH_CUR_POS( + cur_node, (is_double + ? kExprNodeDoubleQuotedString + : kExprNodeSingleQuotedString)); + *top_node_p = cur_node; + parse_quoted_string(pstate, cur_node, cur_token, ast_stack, is_invalid); + want_node = kENodeOperator; + break; + } } viml_pexpr_parse_cycle_end: prev_token = cur_token; diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index 0d496c87ba..a09cdde4c0 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -195,6 +195,8 @@ typedef enum { kExprNodeConcatOrSubscript = 'S', kExprNodeInteger = '0', ///< Integral number. kExprNodeFloat = '1', ///< Floating-point number. + kExprNodeSingleQuotedString = '\'', + kExprNodeDoubleQuotedString = '"', } ExprASTNodeType; typedef struct expr_ast_node ExprASTNode; @@ -249,6 +251,11 @@ struct expr_ast_node { struct { float_T value; } flt; ///< For kExprNodeFloat. + struct { + char *value; + size_t size; + } str; ///< For kExprNodeSingleQuotedString and + ///< kExprNodeDoubleQuotedString. } data; }; |