diff options
author | Justin M. Keyes <justinkz@gmail.com> | 2017-12-09 18:47:34 +0100 |
---|---|---|
committer | Justin M. Keyes <justinkz@gmail.com> | 2017-12-09 18:47:34 +0100 |
commit | 3cc7ebf8107bddb42a566ede4c2a51818ff623c5 (patch) | |
tree | e2f7a79ff73ee7a8e2a292f77fdb0df67485e555 /src | |
parent | afae4b514183c490737a28f2946def717e78a11c (diff) | |
parent | fbdc3ac4efbc97e2965b6083d429beabe261461c (diff) | |
download | rneovim-3cc7ebf8107bddb42a566ede4c2a51818ff623c5.tar.gz rneovim-3cc7ebf8107bddb42a566ede4c2a51818ff623c5.tar.bz2 rneovim-3cc7ebf8107bddb42a566ede4c2a51818ff623c5.zip |
Merge #7234 'built-in expression parser'
Diffstat (limited to 'src')
-rw-r--r-- | src/nvim/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/nvim/api/vim.c | 454 | ||||
-rw-r--r-- | src/nvim/ascii.h | 13 | ||||
-rw-r--r-- | src/nvim/charset.c | 207 | ||||
-rw-r--r-- | src/nvim/charset.h | 16 | ||||
-rw-r--r-- | src/nvim/edit.c | 27 | ||||
-rw-r--r-- | src/nvim/ex_docmd.c | 5 | ||||
-rw-r--r-- | src/nvim/ex_getln.c | 67 | ||||
-rwxr-xr-x | src/nvim/generators/gen_declarations.lua | 43 | ||||
-rw-r--r-- | src/nvim/globals.h | 23 | ||||
-rw-r--r-- | src/nvim/keymap.c | 174 | ||||
-rw-r--r-- | src/nvim/lib/kvec.h | 11 | ||||
-rw-r--r-- | src/nvim/lib/ringbuf.h | 27 | ||||
-rw-r--r-- | src/nvim/mbyte.c | 260 | ||||
-rw-r--r-- | src/nvim/mbyte.h | 8 | ||||
-rw-r--r-- | src/nvim/syntax.c | 676 | ||||
-rw-r--r-- | src/nvim/syntax.h | 3 | ||||
-rw-r--r-- | src/nvim/version.c | 28 | ||||
-rw-r--r-- | src/nvim/vim.h | 7 | ||||
-rw-r--r-- | src/nvim/viml/parser/expressions.c | 3102 | ||||
-rw-r--r-- | src/nvim/viml/parser/expressions.h | 389 | ||||
-rw-r--r-- | src/nvim/viml/parser/parser.c | 13 | ||||
-rw-r--r-- | src/nvim/viml/parser/parser.h | 244 |
23 files changed, 5191 insertions, 608 deletions
diff --git a/src/nvim/CMakeLists.txt b/src/nvim/CMakeLists.txt index a166ee6c02..d3e07326bf 100644 --- a/src/nvim/CMakeLists.txt +++ b/src/nvim/CMakeLists.txt @@ -86,6 +86,8 @@ foreach(subdir event eval lua + viml + viml/parser ) if(${subdir} MATCHES "tui" AND NOT FEAT_TUI) continue() diff --git a/src/nvim/api/vim.c b/src/nvim/api/vim.c index f4ccf07bec..416e7d22d2 100644 --- a/src/nvim/api/vim.c +++ b/src/nvim/api/vim.c @@ -33,6 +33,8 @@ #include "nvim/syntax.h" #include "nvim/getchar.h" #include "nvim/os/input.h" +#include "nvim/viml/parser/expressions.h" +#include "nvim/viml/parser/parser.h" #define LINE_BUFFER_SIZE 4096 @@ -889,6 +891,458 @@ theend: return rv; } +typedef struct { + ExprASTNode **node_p; + Object *ret_node_p; +} ExprASTConvStackItem; + +typedef kvec_withinit_t(ExprASTConvStackItem, 16) ExprASTConvStack; + +/// Parse a VimL expression +/// +/// @param[in] expr Expression to parse. Is always treated as a single line. +/// @param[in] flags Flags: +/// +/// - "m" if multiple expressions in a row are allowed (only +/// the first one will be parsed), +/// - "E" if EOC tokens are not allowed (determines whether +/// they will stop parsing process or be recognized as an +/// operator/space, though also yielding an error). +/// - "l" when needing to start parsing with lvalues for +/// ":let" or ":for". +/// +/// Common flag sets: +/// - "m" to parse like for ":echo". +/// - "E" to parse like for "<C-r>=". +/// - empty string for ":call". +/// - "lm" to parse for ":let". +/// @param[in] highlight If true, return value will also include "highlight" +/// key containing array of 4-tuples (arrays) (Integer, +/// Integer, Integer, String), where first three numbers +/// define the highlighted region and represent line, +/// starting column and ending column (latter exclusive: +/// one should highlight region [start_col, end_col)). +/// +/// @return AST: top-level dictionary holds keys +/// +/// "error": Dictionary with error, present only if parser saw some +/// error. Contains the following keys: +/// +/// "message": String, error message in printf format, translated. +/// Must contain exactly one "%.*s". +/// "arg": String, error message argument. +/// +/// "len": Amount of bytes successfully parsed. With flags equal to "" +/// that should be equal to the length of expr string. +/// +/// @note: “Sucessfully parsed” here means “participated in AST +/// creation”, not “till the first error”. +/// +/// "ast": AST, either nil or a dictionary with these keys: +/// +/// "type": node type, one of the value names from ExprASTNodeType +/// stringified without "kExprNode" prefix. +/// "start": a pair [line, column] describing where node is “started” +/// where "line" is always 0 (will not be 0 if you will be +/// using nvim_parse_viml() on e.g. ":let", but that is not +/// present yet). Both elements are Integers. +/// "len": “length” of the node. This and "start" are there for +/// debugging purposes primary (debugging parser and providing +/// debug information). +/// "children": a list of nodes described in top/"ast". There always +/// is zero, one or two children, key will not be present +/// if node has no children. Maximum number of children +/// may be found in node_maxchildren array. +/// +/// Local values (present only for certain nodes): +/// +/// "scope": a single Integer, specifies scope for "Option" and +/// "PlainIdentifier" nodes. For "Option" it is one of +/// ExprOptScope values, for "PlainIdentifier" it is one of +/// ExprVarScope values. +/// "ident": identifier (without scope, if any), present for "Option", +/// "PlainIdentifier", "PlainKey" and "Environment" nodes. +/// "name": Integer, register name (one character) or -1. Only present +/// for "Register" nodes. +/// "cmp_type": String, comparison type, one of the value names from +/// ExprComparisonType, stringified without "kExprCmp" +/// prefix. Only present for "Comparison" nodes. +/// "ccs_strategy": String, case comparison strategy, one of the +/// value names from ExprCaseCompareStrategy, +/// stringified without "kCCStrategy" prefix. Only +/// present for "Comparison" nodes. +/// "augmentation": String, augmentation type for "Assignment" nodes. +/// Is either an empty string, "Add", "Subtract" or +/// "Concat" for "=", "+=", "-=" or ".=" respectively. +/// "invert": Boolean, true if result of comparison needs to be +/// inverted. Only present for "Comparison" nodes. +/// "ivalue": Integer, integer value for "Integer" nodes. +/// "fvalue": Float, floating-point value for "Float" nodes. +/// "svalue": String, value for "SingleQuotedString" and +/// "DoubleQuotedString" nodes. +Dictionary nvim_parse_expression(String expr, String flags, Boolean highlight, + Error *err) + FUNC_API_SINCE(4) FUNC_API_ASYNC +{ + int pflags = 0; + for (size_t i = 0 ; i < flags.size ; i++) { + switch (flags.data[i]) { + case 'm': { pflags |= kExprFlagsMulti; break; } + case 'E': { pflags |= kExprFlagsDisallowEOC; break; } + case 'l': { pflags |= kExprFlagsParseLet; break; } + case NUL: { + api_set_error(err, kErrorTypeValidation, "Invalid flag: '\\0' (%u)", + (unsigned)flags.data[i]); + return (Dictionary)ARRAY_DICT_INIT; + } + default: { + api_set_error(err, kErrorTypeValidation, "Invalid flag: '%c' (%u)", + flags.data[i], (unsigned)flags.data[i]); + return (Dictionary)ARRAY_DICT_INIT; + } + } + } + ParserLine plines[] = { + { + .data = expr.data, + .size = expr.size, + .allocated = false, + }, + { NULL, 0, false }, + }; + ParserLine *plines_p = plines; + ParserHighlight colors; + kvi_init(colors); + ParserHighlight *const colors_p = (highlight ? &colors : NULL); + ParserState pstate; + viml_parser_init( + &pstate, parser_simple_get_line, &plines_p, colors_p); + ExprAST east = viml_pexpr_parse(&pstate, pflags); + + const size_t ret_size = ( + 2 // "ast", "len" + + (size_t)(east.err.msg != NULL) // "error" + + (size_t)highlight // "highlight" + + 0); + Dictionary ret = { + .items = xmalloc(ret_size * sizeof(ret.items[0])), + .size = 0, + .capacity = ret_size, + }; + ret.items[ret.size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("ast"), + .value = NIL, + }; + ret.items[ret.size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("len"), + .value = INTEGER_OBJ((Integer)(pstate.pos.line == 1 + ? plines[0].size + : pstate.pos.col)), + }; + if (east.err.msg != NULL) { + Dictionary err_dict = { + .items = xmalloc(2 * sizeof(err_dict.items[0])), + .size = 2, + .capacity = 2, + }; + err_dict.items[0] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("message"), + .value = STRING_OBJ(cstr_to_string(east.err.msg)), + }; + if (east.err.arg == NULL) { + err_dict.items[1] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("arg"), + .value = STRING_OBJ(STRING_INIT), + }; + } else { + err_dict.items[1] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("arg"), + .value = STRING_OBJ(((String) { + .data = xmemdupz(east.err.arg, (size_t)east.err.arg_len), + .size = (size_t)east.err.arg_len, + })), + }; + } + ret.items[ret.size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("error"), + .value = DICTIONARY_OBJ(err_dict), + }; + } + if (highlight) { + Array hl = (Array) { + .items = xmalloc(kv_size(colors) * sizeof(hl.items[0])), + .capacity = kv_size(colors), + .size = kv_size(colors), + }; + for (size_t i = 0 ; i < kv_size(colors) ; i++) { + const ParserHighlightChunk chunk = kv_A(colors, i); + Array chunk_arr = (Array) { + .items = xmalloc(4 * sizeof(chunk_arr.items[0])), + .capacity = 4, + .size = 4, + }; + chunk_arr.items[0] = INTEGER_OBJ((Integer)chunk.start.line); + chunk_arr.items[1] = INTEGER_OBJ((Integer)chunk.start.col); + chunk_arr.items[2] = INTEGER_OBJ((Integer)chunk.end_col); + chunk_arr.items[3] = STRING_OBJ(cstr_to_string(chunk.group)); + hl.items[i] = ARRAY_OBJ(chunk_arr); + } + ret.items[ret.size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("highlight"), + .value = ARRAY_OBJ(hl), + }; + } + kvi_destroy(colors); + + // Walk over the AST, freeing nodes in process. + ExprASTConvStack ast_conv_stack; + kvi_init(ast_conv_stack); + kvi_push(ast_conv_stack, ((ExprASTConvStackItem) { + .node_p = &east.root, + .ret_node_p = &ret.items[0].value, + })); + while (kv_size(ast_conv_stack)) { + ExprASTConvStackItem cur_item = kv_last(ast_conv_stack); + ExprASTNode *const node = *cur_item.node_p; + if (node == NULL) { + assert(kv_size(ast_conv_stack) == 1); + kv_drop(ast_conv_stack, 1); + } else { + if (cur_item.ret_node_p->type == kObjectTypeNil) { + const size_t ret_node_items_size = (size_t)( + 3 // "type", "start" and "len" + + (node->children != NULL) // "children" + + (node->type == kExprNodeOption + || node->type == kExprNodePlainIdentifier) // "scope" + + (node->type == kExprNodeOption + || node->type == kExprNodePlainIdentifier + || node->type == kExprNodePlainKey + || node->type == kExprNodeEnvironment) // "ident" + + (node->type == kExprNodeRegister) // "name" + + (3 // "cmp_type", "ccs_strategy", "invert" + * (node->type == kExprNodeComparison)) + + (node->type == kExprNodeInteger) // "ivalue" + + (node->type == kExprNodeFloat) // "fvalue" + + (node->type == kExprNodeDoubleQuotedString + || node->type == kExprNodeSingleQuotedString) // "svalue" + + (node->type == kExprNodeAssignment) // "augmentation" + + 0); + Dictionary ret_node = { + .items = xmalloc(ret_node_items_size * sizeof(ret_node.items[0])), + .capacity = ret_node_items_size, + .size = 0, + }; + *cur_item.ret_node_p = DICTIONARY_OBJ(ret_node); + } + Dictionary *ret_node = &cur_item.ret_node_p->data.dictionary; + if (node->children != NULL) { + const size_t num_children = 1 + (node->children->next != NULL); + Array children_array = { + .items = xmalloc(num_children * sizeof(children_array.items[0])), + .capacity = num_children, + .size = num_children, + }; + for (size_t i = 0; i < num_children; i++) { + children_array.items[i] = NIL; + } + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("children"), + .value = ARRAY_OBJ(children_array), + }; + kvi_push(ast_conv_stack, ((ExprASTConvStackItem) { + .node_p = &node->children, + .ret_node_p = &children_array.items[0], + })); + } else if (node->next != NULL) { + kvi_push(ast_conv_stack, ((ExprASTConvStackItem) { + .node_p = &node->next, + .ret_node_p = cur_item.ret_node_p + 1, + })); + } else if (node != NULL) { + kv_drop(ast_conv_stack, 1); + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("type"), + .value = STRING_OBJ(cstr_to_string(east_node_type_tab[node->type])), + }; + Array start_array = { + .items = xmalloc(2 * sizeof(start_array.items[0])), + .capacity = 2, + .size = 2, + }; + start_array.items[0] = INTEGER_OBJ((Integer)node->start.line); + start_array.items[1] = INTEGER_OBJ((Integer)node->start.col); + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("start"), + .value = ARRAY_OBJ(start_array), + }; + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("len"), + .value = INTEGER_OBJ((Integer)node->len), + }; + switch (node->type) { + case kExprNodeDoubleQuotedString: + case kExprNodeSingleQuotedString: { + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("svalue"), + .value = STRING_OBJ(((String) { + .data = node->data.str.value, + .size = node->data.str.size, + })), + }; + break; + } + case kExprNodeOption: { + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("scope"), + .value = INTEGER_OBJ(node->data.opt.scope), + }; + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("ident"), + .value = STRING_OBJ(((String) { + .data = xmemdupz(node->data.opt.ident, + node->data.opt.ident_len), + .size = node->data.opt.ident_len, + })), + }; + break; + } + case kExprNodePlainIdentifier: { + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("scope"), + .value = INTEGER_OBJ(node->data.var.scope), + }; + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("ident"), + .value = STRING_OBJ(((String) { + .data = xmemdupz(node->data.var.ident, + node->data.var.ident_len), + .size = node->data.var.ident_len, + })), + }; + break; + } + case kExprNodePlainKey: { + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("ident"), + .value = STRING_OBJ(((String) { + .data = xmemdupz(node->data.var.ident, + node->data.var.ident_len), + .size = node->data.var.ident_len, + })), + }; + break; + } + case kExprNodeEnvironment: { + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("ident"), + .value = STRING_OBJ(((String) { + .data = xmemdupz(node->data.env.ident, + node->data.env.ident_len), + .size = node->data.env.ident_len, + })), + }; + break; + } + case kExprNodeRegister: { + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("name"), + .value = INTEGER_OBJ(node->data.reg.name), + }; + break; + } + case kExprNodeComparison: { + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("cmp_type"), + .value = STRING_OBJ(cstr_to_string( + eltkn_cmp_type_tab[node->data.cmp.type])), + }; + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("ccs_strategy"), + .value = STRING_OBJ(cstr_to_string( + ccs_tab[node->data.cmp.ccs])), + }; + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("invert"), + .value = BOOLEAN_OBJ(node->data.cmp.inv), + }; + break; + } + case kExprNodeFloat: { + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("fvalue"), + .value = FLOAT_OBJ(node->data.flt.value), + }; + break; + } + case kExprNodeInteger: { + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("ivalue"), + .value = INTEGER_OBJ((Integer)( + node->data.num.value > API_INTEGER_MAX + ? API_INTEGER_MAX + : (Integer)node->data.num.value)), + }; + break; + } + case kExprNodeAssignment: { + const ExprAssignmentType asgn_type = node->data.ass.type; + ret_node->items[ret_node->size++] = (KeyValuePair) { + .key = STATIC_CSTR_TO_STRING("augmentation"), + .value = STRING_OBJ( + asgn_type == kExprAsgnPlain + ? (String)STRING_INIT + : cstr_to_string(expr_asgn_type_tab[asgn_type])), + }; + break; + } + case kExprNodeMissing: + case kExprNodeOpMissing: + case kExprNodeTernary: + case kExprNodeTernaryValue: + case kExprNodeSubscript: + case kExprNodeListLiteral: + case kExprNodeUnaryPlus: + case kExprNodeBinaryPlus: + case kExprNodeNested: + case kExprNodeCall: + case kExprNodeComplexIdentifier: + case kExprNodeUnknownFigure: + case kExprNodeLambda: + case kExprNodeDictLiteral: + case kExprNodeCurlyBracesIdentifier: + case kExprNodeComma: + case kExprNodeColon: + case kExprNodeArrow: + case kExprNodeConcat: + case kExprNodeConcatOrSubscript: + case kExprNodeOr: + case kExprNodeAnd: + case kExprNodeUnaryMinus: + case kExprNodeBinaryMinus: + case kExprNodeNot: + case kExprNodeMultiplication: + case kExprNodeDivision: + case kExprNodeMod: { + break; + } + } + assert(cur_item.ret_node_p->data.dictionary.size + == cur_item.ret_node_p->data.dictionary.capacity); + xfree(*cur_item.node_p); + *cur_item.node_p = NULL; + } + } + } + kvi_destroy(ast_conv_stack); + + assert(ret.size == ret.capacity); + // Should be a no-op actually, leaving it in case non-nodes will need to be + // freed later. + viml_pexpr_free_ast(east); + viml_parser_destroy(&pstate); + return ret; +} + /// Writes a message to vim output or error buffer. The string is split /// and flushed after each newline. Incomplete lines are kept for writing diff --git a/src/nvim/ascii.h b/src/nvim/ascii.h index adde91f9ec..9ccb70764d 100644 --- a/src/nvim/ascii.h +++ b/src/nvim/ascii.h @@ -3,6 +3,7 @@ #include <stdbool.h> +#include "nvim/macros.h" #include "nvim/func_attr.h" #include "nvim/os/os_defs.h" @@ -98,6 +99,10 @@ static inline bool ascii_isxdigit(int) REAL_FATTR_CONST REAL_FATTR_ALWAYS_INLINE; +static inline bool ascii_isident(int) + REAL_FATTR_CONST + REAL_FATTR_ALWAYS_INLINE; + static inline bool ascii_isbdigit(int) REAL_FATTR_CONST REAL_FATTR_ALWAYS_INLINE; @@ -138,6 +143,14 @@ static inline bool ascii_isxdigit(int c) || (c >= 'A' && c <= 'F'); } +/// Checks if `c` is an “identifier” character +/// +/// That is, whether it is alphanumeric character or underscore. +static inline bool ascii_isident(const int c) +{ + return ASCII_ISALNUM(c) || c == '_'; +} + /// Checks if `c` is a binary digit, that is, 0-1. /// /// @see {ascii_isdigit} diff --git a/src/nvim/charset.c b/src/nvim/charset.c index 577fc13a31..980b4ed426 100644 --- a/src/nvim/charset.c +++ b/src/nvim/charset.c @@ -1611,141 +1611,145 @@ bool vim_isblankline(char_u *lbuf) /// If maxlen > 0, check at a maximum maxlen chars. /// /// @param start -/// @param prep Returns type of number 0 = decimal, 'x' or 'X' is hex, -/// '0' = octal, 'b' or 'B' is bin +/// @param prep Returns guessed type of number 0 = decimal, 'x' or 'X' is +/// hexadecimal, '0' = octal, 'b' or 'B' is binary. When using +/// STR2NR_FORCE is always zero. /// @param len Returns the detected length of number. -/// @param what Recognizes what number passed. +/// @param what Recognizes what number passed, @see ChStr2NrFlags. /// @param nptr Returns the signed result. /// @param unptr Returns the unsigned result. /// @param maxlen Max length of string to check. void vim_str2nr(const char_u *const start, int *const prep, int *const len, const int what, varnumber_T *const nptr, uvarnumber_T *const unptr, const int maxlen) + FUNC_ATTR_NONNULL_ARG(1) { - const char_u *ptr = start; + const char *ptr = (const char *)start; +#define STRING_ENDED(ptr) \ + (!(maxlen == 0 || (int)((ptr) - (const char *)start) < maxlen)) int pre = 0; // default is decimal - bool negative = false; + const bool negative = (ptr[0] == '-'); uvarnumber_T un = 0; - if (ptr[0] == '-') { - negative = true; + if (negative) { ptr++; } - // Recognize hex, octal and bin. - if ((ptr[0] == '0') && (ptr[1] != '8') && (ptr[1] != '9') - && (maxlen == 0 || maxlen > 1)) { + if (what & STR2NR_FORCE) { + // When forcing main consideration is skipping the prefix. Octal and decimal + // numbers have no prefixes to skip. pre is not set. + switch ((unsigned)what & (~(unsigned)STR2NR_FORCE)) { + case STR2NR_HEX: { + if (!STRING_ENDED(ptr + 2) + && ptr[0] == '0' + && (ptr[1] == 'x' || ptr[1] == 'X') + && ascii_isxdigit(ptr[2])) { + ptr += 2; + } + goto vim_str2nr_hex; + } + case STR2NR_BIN: { + if (!STRING_ENDED(ptr + 2) + && ptr[0] == '0' + && (ptr[1] == 'b' || ptr[1] == 'B') + && ascii_isbdigit(ptr[2])) { + ptr += 2; + } + goto vim_str2nr_bin; + } + case STR2NR_OCT: { + goto vim_str2nr_oct; + } + case 0: { + goto vim_str2nr_dec; + } + default: { + assert(false); + } + } + } else if ((what & (STR2NR_HEX|STR2NR_OCT|STR2NR_BIN)) + && !STRING_ENDED(ptr + 1) + && ptr[0] == '0' && ptr[1] != '8' && ptr[1] != '9') { pre = ptr[1]; - + // Detect hexadecimal: 0x or 0X follwed by hex digit if ((what & STR2NR_HEX) - && ((pre == 'X') || (pre == 'x')) - && ascii_isxdigit(ptr[2]) - && (maxlen == 0 || maxlen > 2)) { - // hexadecimal + && !STRING_ENDED(ptr + 2) + && (pre == 'X' || pre == 'x') + && ascii_isxdigit(ptr[2])) { ptr += 2; - } else if ((what & STR2NR_BIN) - && ((pre == 'B') || (pre == 'b')) - && ascii_isbdigit(ptr[2]) - && (maxlen == 0 || maxlen > 2)) { - // binary + goto vim_str2nr_hex; + } + // Detect binary: 0b or 0B follwed by 0 or 1 + if ((what & STR2NR_BIN) + && !STRING_ENDED(ptr + 2) + && (pre == 'B' || pre == 'b') + && ascii_isbdigit(ptr[2])) { ptr += 2; - } else { - // decimal or octal, default is decimal - pre = 0; - - if (what & STR2NR_OCT) { - // Don't interpret "0", "08" or "0129" as octal. - for (int n = 1; ascii_isdigit(ptr[n]); ++n) { - if (ptr[n] > '7') { - // can't be octal - pre = 0; - break; - } - if (ptr[n] >= '0') { - // assume octal - pre = '0'; - } - if (n == maxlen) { - break; - } - } + goto vim_str2nr_bin; + } + // Detect octal number: zero followed by octal digits without '8' or '9' + pre = 0; + if (!(what & STR2NR_OCT) + || !('0' <= ptr[1] && ptr[1] <= '7')) { + goto vim_str2nr_dec; + } + for (int i = 2; !STRING_ENDED(ptr + i) && ascii_isdigit(ptr[i]); i++) { + if (ptr[i] > '7') { + goto vim_str2nr_dec; } } + pre = '0'; + goto vim_str2nr_oct; + } else { + goto vim_str2nr_dec; } // Do the string-to-numeric conversion "manually" to avoid sscanf quirks. - int n = 1; - if ((pre == 'B') || (pre == 'b') || what == STR2NR_BIN + STR2NR_FORCE) { - // bin - if (pre != 0) { - n += 2; // skip over "0b" + assert(false); // Should’ve used goto earlier. +#define PARSE_NUMBER(base, cond, conv) \ + do { \ + while (!STRING_ENDED(ptr) && (cond)) { \ + /* avoid ubsan error for overflow */ \ + if (un < UVARNUMBER_MAX / base) { \ + un = base * un + (uvarnumber_T)(conv); \ + } else { \ + un = UVARNUMBER_MAX; \ + } \ + ptr++; \ + } \ + } while (0) + switch (pre) { + case 'b': + case 'B': { +vim_str2nr_bin: + PARSE_NUMBER(2, (*ptr == '0' || *ptr == '1'), (*ptr - '0')); + break; } - while ('0' <= *ptr && *ptr <= '1') { - // avoid ubsan error for overflow - if (un < UVARNUMBER_MAX / 2) { - un = 2 * un + (uvarnumber_T)(*ptr - '0'); - } else { - un = UVARNUMBER_MAX; - } - ptr++; - if (n++ == maxlen) { - break; - } - } - } else if ((pre == '0') || what == STR2NR_OCT + STR2NR_FORCE) { - // octal - while ('0' <= *ptr && *ptr <= '7') { - // avoid ubsan error for overflow - if (un < UVARNUMBER_MAX / 8) { - un = 8 * un + (uvarnumber_T)(*ptr - '0'); - } else { - un = UVARNUMBER_MAX; - } - ptr++; - if (n++ == maxlen) { - break; - } + case '0': { +vim_str2nr_oct: + PARSE_NUMBER(8, ('0' <= *ptr && *ptr <= '7'), (*ptr - '0')); + break; } - } else if ((pre == 'X') || (pre == 'x') - || what == STR2NR_HEX + STR2NR_FORCE) { - // hex - if (pre != 0) { - n += 2; // skip over "0x" - } - while (ascii_isxdigit(*ptr)) { - // avoid ubsan error for overflow - if (un < UVARNUMBER_MAX / 16) { - un = 16 * un + (uvarnumber_T)hex2nr(*ptr); - } else { - un = UVARNUMBER_MAX; - } - ptr++; - if (n++ == maxlen) { - break; - } + case 0: { +vim_str2nr_dec: + PARSE_NUMBER(10, (ascii_isdigit(*ptr)), (*ptr - '0')); + break; } - } else { - // decimal - while (ascii_isdigit(*ptr)) { - // avoid ubsan error for overflow - if (un < UVARNUMBER_MAX / 10) { - un = 10 * un + (uvarnumber_T)(*ptr - '0'); - } else { - un = UVARNUMBER_MAX; - } - ptr++; - if (n++ == maxlen) { - break; - } + case 'x': + case 'X': { +vim_str2nr_hex: + PARSE_NUMBER(16, (ascii_isxdigit(*ptr)), (hex2nr(*ptr))); + break; } } +#undef PARSE_NUMBER if (prep != NULL) { *prep = pre; } if (len != NULL) { - *len = (int)(ptr - start); + *len = (int)(ptr - (const char *)start); } if (nptr != NULL) { @@ -1767,6 +1771,7 @@ void vim_str2nr(const char_u *const start, int *const prep, int *const len, if (unptr != NULL) { *unptr = un; } +#undef STRING_ENDED } /// Return the value of a single hex character. diff --git a/src/nvim/charset.h b/src/nvim/charset.h index c69582c4c6..eb64b6128a 100644 --- a/src/nvim/charset.h +++ b/src/nvim/charset.h @@ -4,6 +4,7 @@ #include "nvim/types.h" #include "nvim/pos.h" #include "nvim/buffer_defs.h" +#include "nvim/eval/typval.h" /// Return the folded-case equivalent of the given character /// @@ -15,6 +16,21 @@ ?((int)(uint8_t)(c)) \ :((int)(c))) +/// Flags for vim_str2nr() +typedef enum { + STR2NR_DEC = 0, + STR2NR_BIN = (1 << 0), ///< Allow binary numbers. + STR2NR_OCT = (1 << 1), ///< Allow octal numbers. + STR2NR_HEX = (1 << 2), ///< Allow hexadecimal numbers. + /// Force one of the above variants. + /// + /// STR2NR_FORCE|STR2NR_DEC is actually not different from supplying zero + /// as flags, but still present for completeness. + STR2NR_FORCE = (1 << 3), + /// Recognize all formats vim_str2nr() can recognize. + STR2NR_ALL = STR2NR_BIN | STR2NR_OCT | STR2NR_HEX, +} ChStr2NrFlags; + #ifdef INCLUDE_GENERATED_DECLARATIONS # include "charset.h.generated.h" #endif diff --git a/src/nvim/edit.c b/src/nvim/edit.c index 2bafb77fef..859f98d2ad 100644 --- a/src/nvim/edit.c +++ b/src/nvim/edit.c @@ -6066,27 +6066,30 @@ void free_last_insert(void) #endif -/* - * Add character "c" to buffer "s". Escape the special meaning of K_SPECIAL - * and CSI. Handle multi-byte characters. - * Returns a pointer to after the added bytes. - */ +/// Add character "c" to buffer "s" +/// +/// Escapes the special meaning of K_SPECIAL and CSI, handles multi-byte +/// characters. +/// +/// @param[in] c Character to add. +/// @param[out] s Buffer to add to. Must have at least MB_MAXBYTES + 1 bytes. +/// +/// @return Pointer to after the added bytes. char_u *add_char2buf(int c, char_u *s) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT { char_u temp[MB_MAXBYTES + 1]; - int i; - int len; - - len = (*mb_char2bytes)(c, temp); - for (i = 0; i < len; ++i) { + const int len = utf_char2bytes(c, temp); + for (int i = 0; i < len; i++) { c = temp[i]; - /* Need to escape K_SPECIAL and CSI like in the typeahead buffer. */ + // Need to escape K_SPECIAL and CSI like in the typeahead buffer. if (c == K_SPECIAL) { *s++ = K_SPECIAL; *s++ = KS_SPECIAL; *s++ = KE_FILLER; - } else + } else { *s++ = c; + } } return s; } diff --git a/src/nvim/ex_docmd.c b/src/nvim/ex_docmd.c index e11788531b..f6a5f59676 100644 --- a/src/nvim/ex_docmd.c +++ b/src/nvim/ex_docmd.c @@ -5925,9 +5925,10 @@ static void ex_colorscheme(exarg_T *eap) static void ex_highlight(exarg_T *eap) { - if (*eap->arg == NUL && eap->cmd[2] == '!') + if (*eap->arg == NUL && eap->cmd[2] == '!') { MSG(_("Greetings, Vim user!")); - do_highlight(eap->arg, eap->forceit, FALSE); + } + do_highlight((const char *)eap->arg, eap->forceit, false); } diff --git a/src/nvim/ex_getln.c b/src/nvim/ex_getln.c index 99330c177c..1adc8325f8 100644 --- a/src/nvim/ex_getln.c +++ b/src/nvim/ex_getln.c @@ -66,6 +66,8 @@ #include "nvim/lib/kvec.h" #include "nvim/api/private/helpers.h" #include "nvim/highlight_defs.h" +#include "nvim/viml/parser/parser.h" +#include "nvim/viml/parser/expressions.h" /// Command-line colors: one chunk /// @@ -2428,6 +2430,63 @@ void free_cmdline_buf(void) enum { MAX_CB_ERRORS = 1 }; +/// Color expression cmdline using built-in expressions parser +/// +/// @param[in] colored_ccline Command-line to color. +/// @param[out] ret_ccline_colors What should be colored. +/// +/// Always colors the whole cmdline. +static void color_expr_cmdline(const CmdlineInfo *const colored_ccline, + ColoredCmdline *const ret_ccline_colors) + FUNC_ATTR_NONNULL_ALL +{ + ParserLine plines[] = { + { + .data = (const char *)colored_ccline->cmdbuff, + .size = STRLEN(colored_ccline->cmdbuff), + .allocated = false, + }, + { NULL, 0, false }, + }; + ParserLine *plines_p = plines; + ParserHighlight colors; + kvi_init(colors); + ParserState pstate; + viml_parser_init( + &pstate, parser_simple_get_line, &plines_p, &colors); + ExprAST east = viml_pexpr_parse(&pstate, kExprFlagsDisallowEOC); + viml_pexpr_free_ast(east); + viml_parser_destroy(&pstate); + kv_resize(ret_ccline_colors->colors, kv_size(colors)); + size_t prev_end = 0; + for (size_t i = 0 ; i < kv_size(colors) ; i++) { + const ParserHighlightChunk chunk = kv_A(colors, i); + if (chunk.start.col != prev_end) { + kv_push(ret_ccline_colors->colors, ((CmdlineColorChunk) { + .start = prev_end, + .end = chunk.start.col, + .attr = 0, + })); + } + const int id = syn_name2id((const char_u *)chunk.group); + const int attr = (id == 0 ? 0 : syn_id2attr(id)); + kv_push(ret_ccline_colors->colors, ((CmdlineColorChunk) { + .start = chunk.start.col, + .end = chunk.end_col, + .attr = attr, + })); + prev_end = chunk.end_col; + } + if (prev_end < (size_t)colored_ccline->cmdlen) { + kv_push(ret_ccline_colors->colors, ((CmdlineColorChunk) { + .start = prev_end, + .end = (size_t)colored_ccline->cmdlen, + .attr = 0, + })); + } + kvi_destroy(colors); +} + /// Color command-line /// /// Should use built-in command parser or user-specified one. Currently only the @@ -2510,13 +2569,7 @@ static bool color_cmdline(CmdlineInfo *colored_ccline) tl_ret = try_leave(&tstate, &err); can_free_cb = true; } else if (colored_ccline->cmdfirstc == '=') { - try_enter(&tstate); - err_errmsg = N_( - "E5409: Unable to get g:Nvim_color_expr callback: %s"); - dgc_ret = tv_dict_get_callback(&globvardict, S_LEN("Nvim_color_expr"), - &color_cb); - tl_ret = try_leave(&tstate, &err); - can_free_cb = true; + color_expr_cmdline(colored_ccline, ccline_colors); } if (!tl_ret || !dgc_ret) { goto color_cmdline_error; diff --git a/src/nvim/generators/gen_declarations.lua b/src/nvim/generators/gen_declarations.lua index e999e53e4a..065c043557 100755 --- a/src/nvim/generators/gen_declarations.lua +++ b/src/nvim/generators/gen_declarations.lua @@ -164,9 +164,40 @@ local pattern = concat( ) if fname == '--help' then - print'Usage:' - print() - print' gendeclarations.lua definitions.c static.h non-static.h preprocessor.i' + print([[ +Usage: + + gendeclarations.lua definitions.c static.h non-static.h definitions.i + +Generates declarations for a C file definitions.c, putting declarations for +static functions into static.h and declarations for non-static functions into +non-static.h. File `definitions.i' should contain an already preprocessed +version of definitions.c and it is the only one which is actually parsed, +definitions.c is needed only to determine functions from which file out of all +functions found in definitions.i are needed. + +Additionally uses the following environment variables: + + NVIM_GEN_DECLARATIONS_LINE_NUMBERS: + If set to 1 then all generated declarations receive a comment with file + name and line number after the declaration. This may be useful for + debugging gen_declarations script, but not much beyond that with + configured development environment (i.e. with ctags/cscope/finding + definitions with clang/etc). + + WARNING: setting this to 1 will cause extensive rebuilds: declarations + generator script will not regenerate non-static.h file if its + contents did not change, but including line numbers will make + contents actually change. + + With contents changed timestamp of the file is regenerated even + when no real changes were made (e.g. a few lines were added to + a function which is not at the bottom of the file). + + With changed timestamp build system will assume that header + changed, triggering rebuilds of all C files which depend on the + "changed" header. +]]) os.exit() end @@ -249,8 +280,10 @@ while init ~= nil do declaration = declaration:gsub(' $', '') declaration = declaration:gsub('^ ', '') declaration = declaration .. ';' - declaration = declaration .. (' // %s/%s:%u'):format( - curdir, curfile, declline) + if os.getenv('NVIM_GEN_DECLARATIONS_LINE_NUMBERS') == '1' then + declaration = declaration .. (' // %s/%s:%u'):format( + curdir, curfile, declline) + end declaration = declaration .. '\n' if declaration:sub(1, 6) == 'static' then static = static .. declaration diff --git a/src/nvim/globals.h b/src/nvim/globals.h index dcb8b40973..ef56954aa0 100644 --- a/src/nvim/globals.h +++ b/src/nvim/globals.h @@ -729,29 +729,6 @@ EXTERN int vr_lines_changed INIT(= 0); /* #Lines changed by "gR" so far */ /// Encoding used when 'fencs' is set to "default" EXTERN char_u *fenc_default INIT(= NULL); -// To speed up BYTELEN(); keep a lookup table to quickly get the length in -// bytes of a UTF-8 character from the first byte of a UTF-8 string. Bytes -// which are illegal when used as the first byte have a 1. The NUL byte has -// length 1. -EXTERN char utf8len_tab[256] INIT(= { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1, -}); - # if defined(USE_ICONV) && defined(DYNAMIC_ICONV) /* Pointers to functions and variables to be loaded at runtime */ EXTERN size_t (*iconv)(iconv_t cd, const char **inbuf, size_t *inbytesleft, diff --git a/src/nvim/keymap.c b/src/nvim/keymap.c index a75fe793ac..0eb5a41b1e 100644 --- a/src/nvim/keymap.c +++ b/src/nvim/keymap.c @@ -24,23 +24,22 @@ * Some useful tables. */ -static struct modmasktable { - short mod_mask; /* Bit-mask for particular key modifier */ - short mod_flag; /* Bit(s) for particular key modifier */ - char_u name; /* Single letter name of modifier */ -} mod_mask_table[] = -{ - {MOD_MASK_ALT, MOD_MASK_ALT, (char_u)'M'}, - {MOD_MASK_META, MOD_MASK_META, (char_u)'T'}, - {MOD_MASK_CTRL, MOD_MASK_CTRL, (char_u)'C'}, - {MOD_MASK_SHIFT, MOD_MASK_SHIFT, (char_u)'S'}, - {MOD_MASK_MULTI_CLICK, MOD_MASK_2CLICK, (char_u)'2'}, - {MOD_MASK_MULTI_CLICK, MOD_MASK_3CLICK, (char_u)'3'}, - {MOD_MASK_MULTI_CLICK, MOD_MASK_4CLICK, (char_u)'4'}, - {MOD_MASK_CMD, MOD_MASK_CMD, (char_u)'D'}, +static const struct modmasktable { + uint16_t mod_mask; ///< Bit-mask for particular key modifier. + uint16_t mod_flag; ///< Bit(s) for particular key modifier. + char_u name; ///< Single letter name of modifier. +} mod_mask_table[] = { + { MOD_MASK_ALT, MOD_MASK_ALT, (char_u)'M' }, + { MOD_MASK_META, MOD_MASK_META, (char_u)'T' }, + { MOD_MASK_CTRL, MOD_MASK_CTRL, (char_u)'C' }, + { MOD_MASK_SHIFT, MOD_MASK_SHIFT, (char_u)'S' }, + { MOD_MASK_MULTI_CLICK, MOD_MASK_2CLICK, (char_u)'2' }, + { MOD_MASK_MULTI_CLICK, MOD_MASK_3CLICK, (char_u)'3' }, + { MOD_MASK_MULTI_CLICK, MOD_MASK_4CLICK, (char_u)'4' }, + { MOD_MASK_CMD, MOD_MASK_CMD, (char_u)'D' }, // 'A' must be the last one - {MOD_MASK_ALT, MOD_MASK_ALT, (char_u)'A'}, - {0, 0, NUL} + { MOD_MASK_ALT, MOD_MASK_ALT, (char_u)'A' }, + { 0, 0, NUL } }; /* @@ -139,11 +138,10 @@ static char_u modifier_keys_table[] = NUL }; -static struct key_name_entry { +static const struct key_name_entry { int key; // Special key code or ascii value - char *name; // Name of key -} key_names_table[] = -{ + const char *name; // Name of key +} key_names_table[] = { { ' ', "Space" }, { TAB, "Tab" }, { K_TAB, "Tab" }, @@ -318,73 +316,73 @@ static struct mousetable { {0, 0, 0, 0}, }; -/* - * Return the modifier mask bit (MOD_MASK_*) which corresponds to the given - * modifier name ('S' for Shift, 'C' for Ctrl etc). - */ +/// Return the modifier mask bit (#MOD_MASK_*) corresponding to mod name +/// +/// E.g. 'S' for shift, 'C' for ctrl. int name_to_mod_mask(int c) + FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT { - int i; - c = TOUPPER_ASC(c); - for (i = 0; mod_mask_table[i].mod_mask != 0; i++) - if (c == mod_mask_table[i].name) + for (size_t i = 0; mod_mask_table[i].mod_mask != 0; i++) { + if (c == mod_mask_table[i].name) { return mod_mask_table[i].mod_flag; + } + } return 0; } -/* - * Check if if there is a special key code for "key" that includes the - * modifiers specified. - */ -int simplify_key(int key, int *modifiers) +/// Check if there is a special key code for "key" with specified modifiers +/// +/// @param[in] key Initial key code. +/// @param[in,out] modifiers Initial modifiers, is adjusted to have simplified +/// modifiers. +/// +/// @return Simplified key code. +int simplify_key(const int key, int *modifiers) + FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL { - int i; - int key0; - int key1; - if (*modifiers & (MOD_MASK_SHIFT | MOD_MASK_CTRL | MOD_MASK_ALT)) { - /* TAB is a special case */ + // TAB is a special case. if (key == TAB && (*modifiers & MOD_MASK_SHIFT)) { *modifiers &= ~MOD_MASK_SHIFT; return K_S_TAB; } - key0 = KEY2TERMCAP0(key); - key1 = KEY2TERMCAP1(key); - for (i = 0; modifier_keys_table[i] != NUL; i += MOD_KEYS_ENTRY_SIZE) + const int key0 = KEY2TERMCAP0(key); + const int key1 = KEY2TERMCAP1(key); + for (int i = 0; modifier_keys_table[i] != NUL; i += MOD_KEYS_ENTRY_SIZE) { if (key0 == modifier_keys_table[i + 3] && key1 == modifier_keys_table[i + 4] && (*modifiers & modifier_keys_table[i])) { *modifiers &= ~modifier_keys_table[i]; return TERMCAP2KEY(modifier_keys_table[i + 1], - modifier_keys_table[i + 2]); + modifier_keys_table[i + 2]); } + } } return key; } -/* - * Change <xHome> to <Home>, <xUp> to <Up>, etc. - */ -int handle_x_keys(int key) +/// Change <xKey> to <Key> +int handle_x_keys(const int key) + FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT { switch (key) { - case K_XUP: return K_UP; - case K_XDOWN: return K_DOWN; - case K_XLEFT: return K_LEFT; - case K_XRIGHT: return K_RIGHT; - case K_XHOME: return K_HOME; - case K_ZHOME: return K_HOME; - case K_XEND: return K_END; - case K_ZEND: return K_END; - case K_XF1: return K_F1; - case K_XF2: return K_F2; - case K_XF3: return K_F3; - case K_XF4: return K_F4; - case K_S_XF1: return K_S_F1; - case K_S_XF2: return K_S_F2; - case K_S_XF3: return K_S_F3; - case K_S_XF4: return K_S_F4; + case K_XUP: return K_UP; + case K_XDOWN: return K_DOWN; + case K_XLEFT: return K_LEFT; + case K_XRIGHT: return K_RIGHT; + case K_XHOME: return K_HOME; + case K_ZHOME: return K_HOME; + case K_XEND: return K_END; + case K_ZEND: return K_END; + case K_XF1: return K_F1; + case K_XF2: return K_F2; + case K_XF3: return K_F3; + case K_XF4: return K_F4; + case K_S_XF1: return K_S_F1; + case K_S_XF2: return K_S_F2; + case K_S_XF3: return K_S_F3; + case K_S_XF4: return K_S_F4; } return key; } @@ -508,7 +506,7 @@ unsigned int trans_special(const char_u **srcp, const size_t src_len, return 0; } - /* Put the appropriate modifier in a string */ + // Put the appropriate modifier in a string. if (modifiers != 0) { dst[dlen++] = K_SPECIAL; dst[dlen++] = KS_MODIFIER; @@ -569,15 +567,11 @@ int find_special_key(const char_u **srcp, const size_t src_len, int *const modp, // Find end of modifier list last_dash = src; - for (bp = src + 1; bp <= end && (*bp == '-' || vim_isIDc(*bp)); bp++) { + for (bp = src + 1; bp <= end && (*bp == '-' || ascii_isident(*bp)); bp++) { if (*bp == '-') { last_dash = bp; if (bp + 1 <= end) { - if (has_mbyte) { - l = mb_ptr2len_len(bp + 1, (int) (end - bp) + 1); - } else { - l = 1; - } + l = utfc_ptr2len_len(bp + 1, (int)(end - bp) + 1); // Anything accepted, like <C-?>. // <C-"> or <M-"> are not special in strings as " is // the string delimiter. With a backslash it works: <M-\"> @@ -702,33 +696,39 @@ int find_special_key_in_table(int c) { int i; - for (i = 0; key_names_table[i].name != NULL; i++) - if (c == key_names_table[i].key) + for (i = 0; key_names_table[i].name != NULL; i++) { + if (c == key_names_table[i].key) { break; - if (key_names_table[i].name == NULL) + } + } + if (key_names_table[i].name == NULL) { i = -1; + } return i; } -/* - * Find the special key with the given name (the given string does not have to - * end with NUL, the name is assumed to end before the first non-idchar). - * If the name starts with "t_" the next two characters are interpreted as a - * termcap name. - * Return the key code, or 0 if not found. - */ +/// Find the special key with the given name +/// +/// @param[in] name Name of the special. Does not have to end with NUL, it is +/// assumed to end before the first non-idchar. If name starts +/// with "t_" the next two characters are interpreted as +/// a termcap name. +/// +/// @return Key code or 0 if not found. int get_special_key_code(const char_u *name) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT { - char *table_name; - int i, j; - - for (i = 0; key_names_table[i].name != NULL; i++) { - table_name = key_names_table[i].name; - for (j = 0; vim_isIDc(name[j]) && table_name[j] != NUL; j++) - if (TOLOWER_ASC(table_name[j]) != TOLOWER_ASC(name[j])) + for (int i = 0; key_names_table[i].name != NULL; i++) { + const char *const table_name = key_names_table[i].name; + int j; + for (j = 0; ascii_isident(name[j]) && table_name[j] != NUL; j++) { + if (TOLOWER_ASC(table_name[j]) != TOLOWER_ASC(name[j])) { break; - if (!vim_isIDc(name[j]) && table_name[j] == NUL) + } + } + if (!ascii_isident(name[j]) && table_name[j] == NUL) { return key_names_table[i].key; + } } return 0; diff --git a/src/nvim/lib/kvec.h b/src/nvim/lib/kvec.h index 584282d773..ee1b890cb9 100644 --- a/src/nvim/lib/kvec.h +++ b/src/nvim/lib/kvec.h @@ -62,7 +62,16 @@ #define kv_pop(v) ((v).items[--(v).size]) #define kv_size(v) ((v).size) #define kv_max(v) ((v).capacity) -#define kv_last(v) kv_A(v, kv_size(v) - 1) +#define kv_Z(v, i) kv_A(v, kv_size(v) - (i) - 1) +#define kv_last(v) kv_Z(v, 0) + +/// Drop last n items from kvec without resizing +/// +/// Previously spelled as `(void)kv_pop(v)`, repeated n times. +/// +/// @param[out] v Kvec to drop items from. +/// @param[in] n Number of elements to drop. +#define kv_drop(v, n) ((v).size -= (n)) #define kv_resize(v, s) \ ((v).capacity = (s), \ diff --git a/src/nvim/lib/ringbuf.h b/src/nvim/lib/ringbuf.h index 12b75ec65a..e63eae70b0 100644 --- a/src/nvim/lib/ringbuf.h +++ b/src/nvim/lib/ringbuf.h @@ -15,6 +15,7 @@ #ifndef NVIM_LIB_RINGBUF_H #define NVIM_LIB_RINGBUF_H +#include <stddef.h> #include <string.h> #include <assert.h> #include <stdint.h> @@ -73,6 +74,32 @@ typedef struct { \ RBType *buf_end; \ } TypeName##RingBuffer; +/// Dummy item free macros, for use in RINGBUF_INIT +/// +/// This macros actually does nothing. +/// +/// @param[in] item Item to be freed. +#define RINGBUF_DUMMY_FREE(item) + +/// Static ring buffer +/// +/// @warning Ring buffers created with this macros must neither be freed nor +/// deallocated. +/// +/// @param scope Ring buffer scope. +/// @param TypeName Ring buffer type name. +/// @param RBType Type of the single ring buffer element. +/// @param varname Variable name. +/// @param rbsize Ring buffer size. +#define RINGBUF_STATIC(scope, TypeName, RBType, varname, rbsize) \ +static RBType _##varname##_buf[rbsize]; \ +scope TypeName##RingBuffer varname = { \ + .buf = _##varname##_buf, \ + .next = _##varname##_buf, \ + .first = NULL, \ + .buf_end = _##varname##_buf + rbsize - 1, \ +}; + /// Initialize a new ring buffer /// /// @param TypeName Ring buffer type name. Actual type name will be diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index b24770a409..008bce6df6 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -72,19 +72,49 @@ struct interval { # include "unicode_tables.generated.h" #endif -/* - * Like utf8len_tab above, but using a zero for illegal lead bytes. - */ -const uint8_t utf8len_tab_zero[256] = -{ - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0, +// To speed up BYTELEN(); keep a lookup table to quickly get the length in +// bytes of a UTF-8 character from the first byte of a UTF-8 string. Bytes +// which are illegal when used as the first byte have a 1. The NUL byte has +// length 1. +const uint8_t utf8len_tab[] = { + // ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9 ?A ?B ?C ?D ?E ?F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B? + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C? + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D? + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E? + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1, // F? +}; + +// Like utf8len_tab above, but using a zero for illegal lead bytes. +const uint8_t utf8len_tab_zero[] = { + // ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9 ?A ?B ?C ?D ?E ?F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6? + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7? + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8? + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9? + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A? + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B? + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C? + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D? + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E? + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0, // F? }; /* @@ -528,45 +558,52 @@ int utf_off2cells(unsigned off, unsigned max_off) return (off + 1 < max_off && ScreenLines[off + 1] == 0) ? 2 : 1; } -/* - * Convert a UTF-8 byte sequence to a wide character. - * If the sequence is illegal or truncated by a NUL the first byte is - * returned. - * Does not include composing characters, of course. - */ -int utf_ptr2char(const char_u *p) +/// Convert a UTF-8 byte sequence to a wide character +/// +/// If the sequence is illegal or truncated by a NUL then the first byte is +/// returned. Does not include composing characters for obvious reasons. +/// +/// @param[in] p String to convert. +/// +/// @return Unicode codepoint or byte value. +int utf_ptr2char(const char_u *const p) + FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT { - uint8_t len; - - if (p[0] < 0x80) /* be quick for ASCII */ + if (p[0] < 0x80) { // Be quick for ASCII. return p[0]; + } - len = utf8len_tab_zero[p[0]]; + const uint8_t len = utf8len_tab_zero[p[0]]; if (len > 1 && (p[1] & 0xc0) == 0x80) { - if (len == 2) + if (len == 2) { return ((p[0] & 0x1f) << 6) + (p[1] & 0x3f); + } if ((p[2] & 0xc0) == 0x80) { - if (len == 3) - return ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) - + (p[2] & 0x3f); + if (len == 3) { + return (((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + + (p[2] & 0x3f)); + } if ((p[3] & 0xc0) == 0x80) { - if (len == 4) - return ((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12) - + ((p[2] & 0x3f) << 6) + (p[3] & 0x3f); + if (len == 4) { + return (((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12) + + ((p[2] & 0x3f) << 6) + (p[3] & 0x3f)); + } if ((p[4] & 0xc0) == 0x80) { - if (len == 5) - return ((p[0] & 0x03) << 24) + ((p[1] & 0x3f) << 18) - + ((p[2] & 0x3f) << 12) + ((p[3] & 0x3f) << 6) - + (p[4] & 0x3f); - if ((p[5] & 0xc0) == 0x80 && len == 6) - return ((p[0] & 0x01) << 30) + ((p[1] & 0x3f) << 24) - + ((p[2] & 0x3f) << 18) + ((p[3] & 0x3f) << 12) - + ((p[4] & 0x3f) << 6) + (p[5] & 0x3f); + if (len == 5) { + return (((p[0] & 0x03) << 24) + ((p[1] & 0x3f) << 18) + + ((p[2] & 0x3f) << 12) + ((p[3] & 0x3f) << 6) + + (p[4] & 0x3f)); + } + if ((p[5] & 0xc0) == 0x80 && len == 6) { + return (((p[0] & 0x01) << 30) + ((p[1] & 0x3f) << 24) + + ((p[2] & 0x3f) << 18) + ((p[3] & 0x3f) << 12) + + ((p[4] & 0x3f) << 6) + (p[5] & 0x3f)); + } } } } } - /* Illegal value, just return the first byte */ + // Illegal value: just return the first byte. return p[0]; } @@ -767,23 +804,24 @@ int utfc_char2bytes(int off, char_u *buf) return len; } -/* - * Get the length of a UTF-8 byte sequence, not including any following - * composing characters. - * Returns 0 for "". - * Returns 1 for an illegal byte sequence. - */ -int utf_ptr2len(const char_u *p) +/// Get the length of a UTF-8 byte sequence representing a single codepoint +/// +/// @param[in] p UTF-8 string. +/// +/// @return Sequence length, 0 for empty string and 1 for non-UTF-8 byte +/// sequence. +int utf_ptr2len(const char_u *const p) + FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL { - int len; - int i; - - if (*p == NUL) + if (*p == NUL) { return 0; - len = utf8len_tab[*p]; - for (i = 1; i < len; ++i) - if ((p[i] & 0xc0) != 0x80) + } + const int len = utf8len_tab[*p]; + for (int i = 1; i < len; i++) { + if ((p[i] & 0xc0) != 0x80) { return 1; + } + } return len; } @@ -824,38 +862,38 @@ int utf_ptr2len_len(const char_u *p, int size) return len; } -/* - * Return the number of bytes the UTF-8 encoding of the character at "p" takes. - * This includes following composing characters. - */ -int utfc_ptr2len(const char_u *p) +/// Return the number of bytes occupied by a UTF-8 character in a string +/// +/// This includes following composing characters. +int utfc_ptr2len(const char_u *const p) + FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL { - int len; - int b0 = *p; - int prevlen; + uint8_t b0 = (uint8_t)(*p); - if (b0 == NUL) + if (b0 == NUL) { return 0; - if (b0 < 0x80 && p[1] < 0x80) /* be quick for ASCII */ + } + if (b0 < 0x80 && p[1] < 0x80) { // be quick for ASCII return 1; + } - /* Skip over first UTF-8 char, stopping at a NUL byte. */ - len = utf_ptr2len(p); + // Skip over first UTF-8 char, stopping at a NUL byte. + int len = utf_ptr2len(p); - /* Check for illegal byte. */ - if (len == 1 && b0 >= 0x80) + // Check for illegal byte. + if (len == 1 && b0 >= 0x80) { return 1; + } - /* - * Check for composing characters. We can handle only the first six, but - * skip all of them (otherwise the cursor would get stuck). - */ - prevlen = 0; - for (;; ) { - if (p[len] < 0x80 || !UTF_COMPOSINGLIKE(p + prevlen, p + len)) + // Check for composing characters. We can handle only the first six, but + // skip all of them (otherwise the cursor would get stuck). + int prevlen = 0; + for (;;) { + if (p[len] < 0x80 || !UTF_COMPOSINGLIKE(p + prevlen, p + len)) { return len; + } - /* Skip over composing char */ + // Skip over composing char. prevlen = len; len += utf_ptr2len(p + len); } @@ -913,23 +951,22 @@ int utfc_ptr2len_len(const char_u *p, int size) return len; } -/* - * Return the number of bytes the UTF-8 encoding of character "c" takes. - * This does not include composing characters. - */ -int utf_char2len(int c) +/// Determine how many bytes certain unicode codepoint will occupy +int utf_char2len(const int c) { - if (c < 0x80) + if (c < 0x80) { return 1; - if (c < 0x800) + } else if (c < 0x800) { return 2; - if (c < 0x10000) + } else if (c < 0x10000) { return 3; - if (c < 0x200000) + } else if (c < 0x200000) { return 4; - if (c < 0x4000000) + } else if (c < 0x4000000) { return 5; - return 6; + } else { + return 6; + } } /// Convert Unicode character to UTF-8 string @@ -937,46 +974,42 @@ int utf_char2len(int c) /// @param c character to convert to \p buf /// @param[out] buf UTF-8 string generated from \p c, does not add \0 /// @return Number of bytes (1-6). Does not include composing characters. -int utf_char2bytes(int c, char_u *const buf) +int utf_char2bytes(const int c, char_u *const buf) { - if (c < 0x80) { /* 7 bits */ + if (c < 0x80) { // 7 bits buf[0] = c; return 1; - } - if (c < 0x800) { /* 11 bits */ + } else if (c < 0x800) { // 11 bits buf[0] = 0xc0 + ((unsigned)c >> 6); buf[1] = 0x80 + (c & 0x3f); return 2; - } - if (c < 0x10000) { /* 16 bits */ + } else if (c < 0x10000) { // 16 bits buf[0] = 0xe0 + ((unsigned)c >> 12); buf[1] = 0x80 + (((unsigned)c >> 6) & 0x3f); buf[2] = 0x80 + (c & 0x3f); return 3; - } - if (c < 0x200000) { /* 21 bits */ + } else if (c < 0x200000) { // 21 bits buf[0] = 0xf0 + ((unsigned)c >> 18); buf[1] = 0x80 + (((unsigned)c >> 12) & 0x3f); buf[2] = 0x80 + (((unsigned)c >> 6) & 0x3f); buf[3] = 0x80 + (c & 0x3f); return 4; - } - if (c < 0x4000000) { /* 26 bits */ + } else if (c < 0x4000000) { // 26 bits buf[0] = 0xf8 + ((unsigned)c >> 24); buf[1] = 0x80 + (((unsigned)c >> 18) & 0x3f); buf[2] = 0x80 + (((unsigned)c >> 12) & 0x3f); buf[3] = 0x80 + (((unsigned)c >> 6) & 0x3f); buf[4] = 0x80 + (c & 0x3f); return 5; + } else { // 31 bits + buf[0] = 0xfc + ((unsigned)c >> 30); + buf[1] = 0x80 + (((unsigned)c >> 24) & 0x3f); + buf[2] = 0x80 + (((unsigned)c >> 18) & 0x3f); + buf[3] = 0x80 + (((unsigned)c >> 12) & 0x3f); + buf[4] = 0x80 + (((unsigned)c >> 6) & 0x3f); + buf[5] = 0x80 + (c & 0x3f); + return 6; } - /* 31 bits */ - buf[0] = 0xfc + ((unsigned)c >> 30); - buf[1] = 0x80 + (((unsigned)c >> 24) & 0x3f); - buf[2] = 0x80 + (((unsigned)c >> 18) & 0x3f); - buf[3] = 0x80 + (((unsigned)c >> 12) & 0x3f); - buf[4] = 0x80 + (((unsigned)c >> 6) & 0x3f); - buf[5] = 0x80 + (c & 0x3f); - return 6; } /* @@ -1513,14 +1546,15 @@ int utf_head_off(const char_u *base, const char_u *p) return (int)(p - q); } -/* - * Copy a character from "*fp" to "*tp" and advance the pointers. - */ -void mb_copy_char(const char_u **fp, char_u **tp) +/// Copy a character, advancing the pointers +/// +/// @param[in,out] fp Source of the character to copy. +/// @param[in,out] tp Destination to copy to. +void mb_copy_char(const char_u **const fp, char_u **const tp) { - int l = (*mb_ptr2len)(*fp); + const size_t l = (size_t)utfc_ptr2len(*fp); - memmove(*tp, *fp, (size_t)l); + memmove(*tp, *fp, l); *tp += l; *fp += l; } @@ -2262,9 +2296,7 @@ int convert_setup_ext(vimconv_T *vcp, char_u *from, bool from_unicode_is_utf8, if (vcp->vc_type == CONV_ICONV && vcp->vc_fd != (iconv_t)-1) iconv_close(vcp->vc_fd); # endif - vcp->vc_type = CONV_NONE; - vcp->vc_factor = 1; - vcp->vc_fail = false; + *vcp = (vimconv_T)MBYTE_NONE_CONV; /* No conversion when one of the names is empty or they are equal. */ if (from == NULL || *from == NUL || to == NULL || *to == NUL diff --git a/src/nvim/mbyte.h b/src/nvim/mbyte.h index bf6ccb13a5..a5ce1b0a15 100644 --- a/src/nvim/mbyte.h +++ b/src/nvim/mbyte.h @@ -60,6 +60,12 @@ typedef enum { CONV_ICONV = 5, } ConvFlags; +#define MBYTE_NONE_CONV { \ + .vc_type = CONV_NONE, \ + .vc_factor = 1, \ + .vc_fail = false, \ +} + /// Structure used for string conversions typedef struct { int vc_type; ///< Zero or more ConvFlags. @@ -73,6 +79,8 @@ typedef struct { extern const uint8_t utf8len_tab_zero[256]; +extern const uint8_t utf8len_tab[256]; + #ifdef INCLUDE_GENERATED_DECLARATIONS # include "mbyte.h.generated.h" #endif diff --git a/src/nvim/syntax.c b/src/nvim/syntax.c index bc7362af72..d1a5f0bd1c 100644 --- a/src/nvim/syntax.c +++ b/src/nvim/syntax.c @@ -5930,9 +5930,9 @@ static void syntime_report(void) // // When making changes here, also change runtime/colors/default.vim! -static char *highlight_init_both[] = -{ - "Conceal ctermbg=DarkGrey ctermfg=LightGrey guibg=DarkGrey guifg=LightGrey", +static const char *highlight_init_both[] = { + "Conceal " + "ctermbg=DarkGrey ctermfg=LightGrey guibg=DarkGrey guifg=LightGrey", "Cursor guibg=fg guifg=bg", "lCursor guibg=fg guifg=bg", "DiffText cterm=bold ctermbg=Red gui=bold guibg=Red", @@ -5955,8 +5955,7 @@ static char *highlight_init_both[] = NULL }; -static char *highlight_init_light[] = -{ +static const char *highlight_init_light[] = { "ColorColumn ctermbg=LightRed guibg=LightRed", "CursorColumn ctermbg=LightGrey guibg=Grey90", "CursorLine cterm=underline guibg=Grey90", @@ -5989,8 +5988,7 @@ static char *highlight_init_light[] = NULL }; -static char *highlight_init_dark[] = -{ +static const char *highlight_init_dark[] = { "ColorColumn ctermbg=DarkRed guibg=DarkRed", "CursorColumn ctermbg=DarkGrey guibg=Grey40", "CursorLine cterm=underline guibg=Grey40", @@ -6023,18 +6021,223 @@ static char *highlight_init_dark[] = NULL }; +const char *const highlight_init_cmdline[] = { + // XXX When modifying a list modify it in both valid and invalid halfs. + // TODO(ZyX-I): merge valid and invalid groups via a macros. + + // NvimInternalError should appear only when highlighter has a bug. + "NvimInternalError ctermfg=Red ctermbg=Red guifg=Red guibg=Red", + + // Highlight groups (links) used by parser: + + "default link NvimAssignment Operator", + "default link NvimPlainAssignment NvimAssignment", + "default link NvimAugmentedAssignment NvimAssignment", + "default link NvimAssignmentWithAddition NvimAugmentedAssignment", + "default link NvimAssignmentWithSubtraction NvimAugmentedAssignment", + "default link NvimAssignmentWithConcatenation NvimAugmentedAssignment", + + "default link NvimOperator Operator", + + "default link NvimUnaryOperator NvimOperator", + "default link NvimUnaryPlus NvimUnaryOperator", + "default link NvimUnaryMinus NvimUnaryOperator", + "default link NvimNot NvimUnaryOperator", + + "default link NvimBinaryOperator NvimOperator", + "default link NvimComparison NvimBinaryOperator", + "default link NvimComparisonModifier NvimComparison", + "default link NvimBinaryPlus NvimBinaryOperator", + "default link NvimBinaryMinus NvimBinaryOperator", + "default link NvimConcat NvimBinaryOperator", + "default link NvimConcatOrSubscript NvimConcat", + "default link NvimOr NvimBinaryOperator", + "default link NvimAnd NvimBinaryOperator", + "default link NvimMultiplication NvimBinaryOperator", + "default link NvimDivision NvimBinaryOperator", + "default link NvimMod NvimBinaryOperator", + + "default link NvimTernary NvimOperator", + "default link NvimTernaryColon NvimTernary", + + "default link NvimParenthesis Delimiter", + "default link NvimLambda NvimParenthesis", + "default link NvimNestingParenthesis NvimParenthesis", + "default link NvimCallingParenthesis NvimParenthesis", + + "default link NvimSubscript NvimParenthesis", + "default link NvimSubscriptBracket NvimSubscript", + "default link NvimSubscriptColon NvimSubscript", + "default link NvimCurly NvimSubscript", + + "default link NvimContainer NvimParenthesis", + "default link NvimDict NvimContainer", + "default link NvimList NvimContainer", + + "default link NvimIdentifier Identifier", + "default link NvimIdentifierScope NvimIdentifier", + "default link NvimIdentifierScopeDelimiter NvimIdentifier", + "default link NvimIdentifierName NvimIdentifier", + "default link NvimIdentifierKey NvimIdentifier", + + "default link NvimColon Delimiter", + "default link NvimComma Delimiter", + "default link NvimArrow Delimiter", + + "default link NvimRegister SpecialChar", + "default link NvimNumber Number", + "default link NvimFloat NvimNumber", + "default link NvimNumberPrefix Type", + + "default link NvimOptionSigil Type", + "default link NvimOptionName NvimIdentifier", + "default link NvimOptionScope NvimIdentifierScope", + "default link NvimOptionScopeDelimiter NvimIdentifierScopeDelimiter", + + "default link NvimEnvironmentSigil NvimOptionSigil", + "default link NvimEnvironmentName NvimIdentifier", + + "default link NvimString String", + "default link NvimStringBody NvimString", + "default link NvimStringQuote NvimString", + "default link NvimStringSpecial SpecialChar", + + "default link NvimSingleQuote NvimStringQuote", + "default link NvimSingleQuotedBody NvimStringBody", + "default link NvimSingleQuotedQuote NvimStringSpecial", + + "default link NvimDoubleQuote NvimStringQuote", + "default link NvimDoubleQuotedBody NvimStringBody", + "default link NvimDoubleQuotedEscape NvimStringSpecial", + + "default link NvimFigureBrace NvimInternalError", + "default link NvimSingleQuotedUnknownEscape NvimInternalError", + + "default link NvimSpacing Normal", + + // NvimInvalid groups: + + "default link NvimInvalidSingleQuotedUnknownEscape NvimInternalError", + + "default link NvimInvalid Error", + + "default link NvimInvalidAssignment NvimInvalid", + "default link NvimInvalidPlainAssignment NvimInvalidAssignment", + "default link NvimInvalidAugmentedAssignment NvimInvalidAssignment", + "default link NvimInvalidAssignmentWithAddition " + "NvimInvalidAugmentedAssignment", + "default link NvimInvalidAssignmentWithSubtraction " + "NvimInvalidAugmentedAssignment", + "default link NvimInvalidAssignmentWithConcatenation " + "NvimInvalidAugmentedAssignment", + + "default link NvimInvalidOperator NvimInvalid", + + "default link NvimInvalidUnaryOperator NvimInvalidOperator", + "default link NvimInvalidUnaryPlus NvimInvalidUnaryOperator", + "default link NvimInvalidUnaryMinus NvimInvalidUnaryOperator", + "default link NvimInvalidNot NvimInvalidUnaryOperator", + + "default link NvimInvalidBinaryOperator NvimInvalidOperator", + "default link NvimInvalidComparison NvimInvalidBinaryOperator", + "default link NvimInvalidComparisonModifier NvimInvalidComparison", + "default link NvimInvalidBinaryPlus NvimInvalidBinaryOperator", + "default link NvimInvalidBinaryMinus NvimInvalidBinaryOperator", + "default link NvimInvalidConcat NvimInvalidBinaryOperator", + "default link NvimInvalidConcatOrSubscript NvimInvalidConcat", + "default link NvimInvalidOr NvimInvalidBinaryOperator", + "default link NvimInvalidAnd NvimInvalidBinaryOperator", + "default link NvimInvalidMultiplication NvimInvalidBinaryOperator", + "default link NvimInvalidDivision NvimInvalidBinaryOperator", + "default link NvimInvalidMod NvimInvalidBinaryOperator", + + "default link NvimInvalidTernary NvimInvalidOperator", + "default link NvimInvalidTernaryColon NvimInvalidTernary", + + "default link NvimInvalidDelimiter NvimInvalid", + + "default link NvimInvalidParenthesis NvimInvalidDelimiter", + "default link NvimInvalidLambda NvimInvalidParenthesis", + "default link NvimInvalidNestingParenthesis NvimInvalidParenthesis", + "default link NvimInvalidCallingParenthesis NvimInvalidParenthesis", + + "default link NvimInvalidSubscript NvimInvalidParenthesis", + "default link NvimInvalidSubscriptBracket NvimInvalidSubscript", + "default link NvimInvalidSubscriptColon NvimInvalidSubscript", + "default link NvimInvalidCurly NvimInvalidSubscript", + + "default link NvimInvalidContainer NvimInvalidParenthesis", + "default link NvimInvalidDict NvimInvalidContainer", + "default link NvimInvalidList NvimInvalidContainer", + + "default link NvimInvalidValue NvimInvalid", + + "default link NvimInvalidIdentifier NvimInvalidValue", + "default link NvimInvalidIdentifierScope NvimInvalidIdentifier", + "default link NvimInvalidIdentifierScopeDelimiter NvimInvalidIdentifier", + "default link NvimInvalidIdentifierName NvimInvalidIdentifier", + "default link NvimInvalidIdentifierKey NvimInvalidIdentifier", + + "default link NvimInvalidColon NvimInvalidDelimiter", + "default link NvimInvalidComma NvimInvalidDelimiter", + "default link NvimInvalidArrow NvimInvalidDelimiter", + + "default link NvimInvalidRegister NvimInvalidValue", + "default link NvimInvalidNumber NvimInvalidValue", + "default link NvimInvalidFloat NvimInvalidNumber", + "default link NvimInvalidNumberPrefix NvimInvalidNumber", + + "default link NvimInvalidOptionSigil NvimInvalidIdentifier", + "default link NvimInvalidOptionName NvimInvalidIdentifier", + "default link NvimInvalidOptionScope NvimInvalidIdentifierScope", + "default link NvimInvalidOptionScopeDelimiter " + "NvimInvalidIdentifierScopeDelimiter", + + "default link NvimInvalidEnvironmentSigil NvimInvalidOptionSigil", + "default link NvimInvalidEnvironmentName NvimInvalidIdentifier", + + // Invalid string bodies and specials are still highlighted as valid ones to + // minimize the red area. + "default link NvimInvalidString NvimInvalidValue", + "default link NvimInvalidStringBody NvimStringBody", + "default link NvimInvalidStringQuote NvimInvalidString", + "default link NvimInvalidStringSpecial NvimStringSpecial", + + "default link NvimInvalidSingleQuote NvimInvalidStringQuote", + "default link NvimInvalidSingleQuotedBody NvimInvalidStringBody", + "default link NvimInvalidSingleQuotedQuote NvimInvalidStringSpecial", + + "default link NvimInvalidDoubleQuote NvimInvalidStringQuote", + "default link NvimInvalidDoubleQuotedBody NvimInvalidStringBody", + "default link NvimInvalidDoubleQuotedEscape NvimInvalidStringSpecial", + "default link NvimInvalidDoubleQuotedUnknownEscape NvimInvalidValue", + + "default link NvimInvalidFigureBrace NvimInvalidDelimiter", + + "default link NvimInvalidSpacing ErrorMsg", + + // Not actually invalid, but we highlight user that he is doing something + // wrong. + "default link NvimDoubleQuotedUnknownEscape NvimInvalidValue", + NULL, +}; + +/// Create default links for Nvim* highlight groups used for cmdline coloring +void syn_init_cmdline_highlight(bool reset, bool init) +{ + for (size_t i = 0 ; highlight_init_cmdline[i] != NULL ; i++) { + do_highlight(highlight_init_cmdline[i], reset, init); + } +} /// Load colors from a file if "g:colors_name" is set, otherwise load builtin /// colors /// /// @param both include groups where 'bg' doesn't matter /// @param reset clear groups first -void -init_highlight(int both, int reset) +void init_highlight(bool both, bool reset) { - int i; - char **pp; - static int had_both = FALSE; + static int had_both = false; // Try finding the color scheme file. Used when a color file was loaded // and 'background' or 't_Co' is changed. @@ -6054,10 +6257,10 @@ init_highlight(int both, int reset) * Didn't use a color file, use the compiled-in colors. */ if (both) { - had_both = TRUE; - pp = highlight_init_both; - for (i = 0; pp[i] != NULL; i++) { - do_highlight((char_u *)pp[i], reset, true); + had_both = true; + const char *const *const pp = highlight_init_both; + for (size_t i = 0; pp[i] != NULL; i++) { + do_highlight(pp[i], reset, true); } } else if (!had_both) { // Don't do anything before the call with both == TRUE from main(). @@ -6066,10 +6269,11 @@ init_highlight(int both, int reset) return; } - pp = (*p_bg == 'l') ? highlight_init_light : highlight_init_dark; - - for (i = 0; pp[i] != NULL; i++) { - do_highlight((char_u *)pp[i], reset, true); + const char *const *const pp = ((*p_bg == 'l') + ? highlight_init_light + : highlight_init_dark); + for (size_t i = 0; pp[i] != NULL; i++) { + do_highlight(pp[i], reset, true); } /* Reverse looks ugly, but grey may not work for 8 colors. Thus let it @@ -6079,15 +6283,14 @@ init_highlight(int both, int reset) * Clear the attributes, needed when changing the t_Co value. */ if (t_colors > 8) { do_highlight( - (char_u *)(*p_bg == 'l' - ? "Visual cterm=NONE ctermbg=LightGrey" - : "Visual cterm=NONE ctermbg=DarkGrey"), false, - true); + (*p_bg == 'l' + ? "Visual cterm=NONE ctermbg=LightGrey" + : "Visual cterm=NONE ctermbg=DarkGrey"), false, true); } else { - do_highlight((char_u *)"Visual cterm=reverse ctermbg=NONE", - FALSE, TRUE); - if (*p_bg == 'l') - do_highlight((char_u *)"Search ctermfg=black", FALSE, TRUE); + do_highlight("Visual cterm=reverse ctermbg=NONE", false, true); + if (*p_bg == 'l') { + do_highlight("Search ctermfg=black", false, true); + } } /* @@ -6104,6 +6307,7 @@ init_highlight(int both, int reset) recursive--; } } + syn_init_cmdline_highlight(false, false); } /* @@ -6138,17 +6342,22 @@ int load_colors(char_u *name) } -/// Handle the ":highlight .." command. -/// When using ":hi clear" this is called recursively for each group with -/// "forceit" and "init" both TRUE. -/// @param init TRUE when called for initializing -void -do_highlight(char_u *line, int forceit, int init) { - char_u *name_end; - char_u *linep; - char_u *key_start; - char_u *arg_start; - char_u *key = NULL, *arg = NULL; +/// Handle ":highlight" command +/// +/// When using ":highlight clear" this is called recursively for each group with +/// forceit and init being both true. +/// +/// @param[in] line Command arguments. +/// @param[in] forceit True when bang is given, allows to link group even if +/// it has its own settings. +/// @param[in] init True when initializing. +void do_highlight(const char *line, const bool forceit, const bool init) + FUNC_ATTR_NONNULL_ALL +{ + const char *name_end; + const char *linep; + const char *key_start; + const char *arg_start; long i; int off; int len; @@ -6162,94 +6371,87 @@ do_highlight(char_u *line, int forceit, int init) { int color; bool is_normal_group = false; // "Normal" group - /* - * If no argument, list current highlighting. - */ - if (ends_excmd(*line)) { + // If no argument, list current highlighting. + if (ends_excmd((uint8_t)(*line))) { for (int i = 1; i <= highlight_ga.ga_len && !got_int; i++) { - // todo(vim): only call when the group has attributes set + // TODO(brammool): only call when the group has attributes set highlight_list_one(i); } return; } - /* - * Isolate the name. - */ - name_end = skiptowhite(line); - linep = skipwhite(name_end); + // Isolate the name. + name_end = (const char *)skiptowhite((const char_u *)line); + linep = (const char *)skipwhite((const char_u *)name_end); - /* - * Check for "default" argument. - */ - if (STRNCMP(line, "default", name_end - line) == 0) { - dodefault = TRUE; + // Check for "default" argument. + if (strncmp(line, "default", name_end - line) == 0) { + dodefault = true; line = linep; - name_end = skiptowhite(line); - linep = skipwhite(name_end); + name_end = (const char *)skiptowhite((const char_u *)line); + linep = (const char *)skipwhite((const char_u *)name_end); } - /* - * Check for "clear" or "link" argument. - */ - if (STRNCMP(line, "clear", name_end - line) == 0) - doclear = TRUE; - if (STRNCMP(line, "link", name_end - line) == 0) - dolink = TRUE; + // Check for "clear" or "link" argument. + if (strncmp(line, "clear", name_end - line) == 0) { + doclear = true; + } else if (strncmp(line, "link", name_end - line) == 0) { + dolink = true; + } - /* - * ":highlight {group-name}": list highlighting for one group. - */ - if (!doclear && !dolink && ends_excmd(*linep)) { - id = syn_namen2id(line, (int)(name_end - line)); - if (id == 0) - EMSG2(_("E411: highlight group not found: %s"), line); - else + // ":highlight {group-name}": list highlighting for one group. + if (!doclear && !dolink && ends_excmd((uint8_t)(*linep))) { + id = syn_namen2id((const char_u *)line, (int)(name_end - line)); + if (id == 0) { + emsgf(_("E411: highlight group not found: %s"), line); + } else { highlight_list_one(id); + } return; } - /* - * Handle ":highlight link {from} {to}" command. - */ + // Handle ":highlight link {from} {to}" command. if (dolink) { - char_u *from_start = linep; - char_u *from_end; - char_u *to_start; - char_u *to_end; + const char *from_start = linep; + const char *from_end; + const char *to_start; + const char *to_end; int from_id; int to_id; - from_end = skiptowhite(from_start); - to_start = skipwhite(from_end); - to_end = skiptowhite(to_start); + from_end = (const char *)skiptowhite((const char_u *)from_start); + to_start = (const char *)skipwhite((const char_u *)from_end); + to_end = (const char *)skiptowhite((const char_u *)to_start); - if (ends_excmd(*from_start) || ends_excmd(*to_start)) { - EMSG2(_("E412: Not enough arguments: \":highlight link %s\""), - from_start); + if (ends_excmd((uint8_t)(*from_start)) + || ends_excmd((uint8_t)(*to_start))) { + emsgf(_("E412: Not enough arguments: \":highlight link %s\""), + from_start); return; } - if (!ends_excmd(*skipwhite(to_end))) { - EMSG2(_("E413: Too many arguments: \":highlight link %s\""), from_start); + if (!ends_excmd(*skipwhite((const char_u *)to_end))) { + emsgf(_("E413: Too many arguments: \":highlight link %s\""), from_start); return; } - from_id = syn_check_group(from_start, (int)(from_end - from_start)); - if (STRNCMP(to_start, "NONE", 4) == 0) + from_id = syn_check_group((const char_u *)from_start, + (int)(from_end - from_start)); + if (strncmp(to_start, "NONE", 4) == 0) { to_id = 0; - else - to_id = syn_check_group(to_start, (int)(to_end - to_start)); + } else { + to_id = syn_check_group((const char_u *)to_start, + (int)(to_end - to_start)); + } if (from_id > 0 && (!init || HL_TABLE()[from_id - 1].sg_set == 0)) { - /* - * Don't allow a link when there already is some highlighting - * for the group, unless '!' is used - */ + // Don't allow a link when there already is some highlighting + // for the group, unless '!' is used if (to_id > 0 && !forceit && !init && hl_has_settings(from_id - 1, dodefault)) { - if (sourcing_name == NULL && !dodefault) + if (sourcing_name == NULL && !dodefault) { EMSG(_("E414: group has settings, highlight link ignored")); + } } else { if (!init) HL_TABLE()[from_id - 1].sg_set |= SG_LINK; @@ -6259,43 +6461,38 @@ do_highlight(char_u *line, int forceit, int init) { } } - /* Only call highlight_changed() once, after sourcing a syntax file */ - need_highlight_changed = TRUE; + // Only call highlight_changed() once, after sourcing a syntax file. + need_highlight_changed = true; return; } if (doclear) { - /* - * ":highlight clear [group]" command. - */ + // ":highlight clear [group]" command. line = linep; - if (ends_excmd(*line)) { + if (ends_excmd((uint8_t)(*line))) { do_unlet(S_LEN("colors_name"), true); restore_cterm_colors(); - /* - * Clear all default highlight groups and load the defaults. - */ - for (int idx = 0; idx < highlight_ga.ga_len; ++idx) { + // Clear all default highlight groups and load the defaults. + for (int idx = 0; idx < highlight_ga.ga_len; idx++) { highlight_clear(idx); } - init_highlight(TRUE, TRUE); + init_highlight(true, true); highlight_changed(); redraw_later_clear(); return; } - name_end = skiptowhite(line); - linep = skipwhite(name_end); + name_end = (const char *)skiptowhite((const char_u *)line); + linep = (const char *)skipwhite((const char_u *)name_end); } - /* - * Find the group name in the table. If it does not exist yet, add it. - */ - id = syn_check_group(line, (int)(name_end - line)); - if (id == 0) /* failed (out of memory) */ + // Find the group name in the table. If it does not exist yet, add it. + id = syn_check_group((const char_u *)line, (int)(name_end - line)); + if (id == 0) { // Failed (out of memory). return; - idx = id - 1; /* index is ID minus one */ + } + idx = id - 1; // Index is ID minus one. // Return if "default" was used and the group already has settings if (dodefault && hl_has_settings(idx, true)) { @@ -6304,19 +6501,21 @@ do_highlight(char_u *line, int forceit, int init) { is_normal_group = (STRCMP(HL_TABLE()[idx].sg_name_u, "NORMAL") == 0); - /* Clear the highlighting for ":hi clear {group}" and ":hi clear". */ + // Clear the highlighting for ":hi clear {group}" and ":hi clear". if (doclear || (forceit && init)) { highlight_clear(idx); if (!doclear) HL_TABLE()[idx].sg_set = 0; } + char *key = NULL; + char *arg = NULL; if (!doclear) { - while (!ends_excmd(*linep)) { + while (!ends_excmd((uint8_t)(*linep))) { key_start = linep; if (*linep == '=') { - EMSG2(_("E415: unexpected equal sign: %s"), key_start); - error = TRUE; + emsgf(_("E415: unexpected equal sign: %s"), key_start); + error = true; break; } @@ -6326,61 +6525,58 @@ do_highlight(char_u *line, int forceit, int init) { linep++; } xfree(key); - key = vim_strnsave_up(key_start, (int)(linep - key_start)); - linep = skipwhite(linep); + key = (char *)vim_strnsave_up((const char_u *)key_start, + (int)(linep - key_start)); + linep = (const char *)skipwhite((const char_u *)linep); - if (STRCMP(key, "NONE") == 0) { + if (strcmp(key, "NONE") == 0) { if (!init || HL_TABLE()[idx].sg_set == 0) { - if (!init) + if (!init) { HL_TABLE()[idx].sg_set |= SG_CTERM+SG_GUI; + } highlight_clear(idx); } continue; } - /* - * Check for the equal sign. - */ + // Check for the equal sign. if (*linep != '=') { - EMSG2(_("E416: missing equal sign: %s"), key_start); - error = TRUE; + emsgf(_("E416: missing equal sign: %s"), key_start); + error = true; break; } - ++linep; + linep++; - /* - * Isolate the argument. - */ - linep = skipwhite(linep); - if (*linep == '\'') { /* guifg='color name' */ + // Isolate the argument. + linep = (const char *)skipwhite((const char_u *)linep); + if (*linep == '\'') { // guifg='color name' arg_start = ++linep; - linep = vim_strchr(linep, '\''); + linep = strchr(linep, '\''); if (linep == NULL) { - EMSG2(_(e_invarg2), key_start); - error = TRUE; + emsgf(_(e_invarg2), key_start); + error = true; break; } } else { arg_start = linep; - linep = skiptowhite(linep); + linep = (const char *)skiptowhite((const char_u *)linep); } if (linep == arg_start) { - EMSG2(_("E417: missing argument: %s"), key_start); - error = TRUE; + emsgf(_("E417: missing argument: %s"), key_start); + error = true; break; } xfree(arg); - arg = vim_strnsave(arg_start, (int)(linep - arg_start)); + arg = xstrndup(arg_start, (size_t)(linep - arg_start)); - if (*linep == '\'') - ++linep; + if (*linep == '\'') { + linep++; + } - /* - * Store the argument. - */ - if ( STRCMP(key, "TERM") == 0 - || STRCMP(key, "CTERM") == 0 - || STRCMP(key, "GUI") == 0) { + // Store the argument. + if (strcmp(key, "TERM") == 0 + || strcmp(key, "CTERM") == 0 + || strcmp(key, "GUI") == 0) { attr = 0; off = 0; while (arg[off] != NUL) { @@ -6393,26 +6589,30 @@ do_highlight(char_u *line, int forceit, int init) { } } if (i < 0) { - EMSG2(_("E418: Illegal value: %s"), arg); - error = TRUE; + emsgf(_("E418: Illegal value: %s"), arg); + error = true; break; } - if (arg[off] == ',') /* another one follows */ - ++off; + if (arg[off] == ',') { // Another one follows. + off++; + } } - if (error) + if (error) { break; + } if (*key == 'C') { if (!init || !(HL_TABLE()[idx].sg_set & SG_CTERM)) { - if (!init) + if (!init) { HL_TABLE()[idx].sg_set |= SG_CTERM; + } HL_TABLE()[idx].sg_cterm = attr; HL_TABLE()[idx].sg_cterm_bold = FALSE; } } else if (*key == 'G') { if (!init || !(HL_TABLE()[idx].sg_set & SG_GUI)) { - if (!init) + if (!init) { HL_TABLE()[idx].sg_set |= SG_GUI; + } HL_TABLE()[idx].sg_gui = attr; } } @@ -6431,14 +6631,14 @@ do_highlight(char_u *line, int forceit, int init) { HL_TABLE()[idx].sg_cterm_bold = FALSE; } - if (ascii_isdigit(*arg)) + if (ascii_isdigit(*arg)) { color = atoi((char *)arg); - else if (STRICMP(arg, "fg") == 0) { - if (cterm_normal_fg_color) + } else if (STRICMP(arg, "fg") == 0) { + if (cterm_normal_fg_color) { color = cterm_normal_fg_color - 1; - else { + } else { EMSG(_("E419: FG color unknown")); - error = TRUE; + error = true; break; } } else if (STRICMP(arg, "bg") == 0) { @@ -6446,70 +6646,84 @@ do_highlight(char_u *line, int forceit, int init) { color = cterm_normal_bg_color - 1; else { EMSG(_("E420: BG color unknown")); - error = TRUE; + error = true; break; } } else { - static char *(color_names[28]) = { + static const char *color_names[] = { "Black", "DarkBlue", "DarkGreen", "DarkCyan", "DarkRed", "DarkMagenta", "Brown", "DarkYellow", "Gray", "Grey", "LightGray", "LightGrey", "DarkGray", "DarkGrey", "Blue", "LightBlue", "Green", "LightGreen", "Cyan", "LightCyan", "Red", "LightRed", "Magenta", - "LightMagenta", "Yellow", "LightYellow", "White", "NONE" + "LightMagenta", "Yellow", "LightYellow", "White", + "NONE" + }; + static const int color_numbers_16[] = { + 0, 1, 2, 3, + 4, 5, 6, 6, + 7, 7, + 7, 7, 8, 8, + 9, 9, 10, 10, + 11, 11, 12, 12, 13, + 13, 14, 14, 15, + -1 + }; + // For xterm with 88 colors: + static int color_numbers_88[] = { + 0, 4, 2, 6, + 1, 5, 32, 72, + 84, 84, + 7, 7, 82, 82, + 12, 43, 10, 61, + 14, 63, 9, 74, 13, + 75, 11, 78, 15, + -1 }; - static int color_numbers_16[28] = {0, 1, 2, 3, - 4, 5, 6, 6, - 7, 7, - 7, 7, 8, 8, - 9, 9, 10, 10, - 11, 11, 12, 12, 13, - 13, 14, 14, 15, -1}; - /* for xterm with 88 colors... */ - static int color_numbers_88[28] = {0, 4, 2, 6, - 1, 5, 32, 72, - 84, 84, - 7, 7, 82, 82, - 12, 43, 10, 61, - 14, 63, 9, 74, 13, - 75, 11, 78, 15, -1}; - /* for xterm with 256 colors... */ - static int color_numbers_256[28] = {0, 4, 2, 6, - 1, 5, 130, 130, - 248, 248, - 7, 7, 242, 242, - 12, 81, 10, 121, - 14, 159, 9, 224, 13, - 225, 11, 229, 15, -1}; - /* for terminals with less than 16 colors... */ - static int color_numbers_8[28] = {0, 4, 2, 6, - 1, 5, 3, 3, - 7, 7, - 7, 7, 0+8, 0+8, - 4+8, 4+8, 2+8, 2+8, - 6+8, 6+8, 1+8, 1+8, 5+8, - 5+8, 3+8, 3+8, 7+8, -1}; - - /* reduce calls to STRICMP a bit, it can be slow */ + // For xterm with 256 colors: + static int color_numbers_256[] = { + 0, 4, 2, 6, + 1, 5, 130, 130, + 248, 248, + 7, 7, 242, 242, + 12, 81, 10, 121, + 14, 159, 9, 224, 13, + 225, 11, 229, 15, + -1 + }; + // For terminals with less than 16 colors: + static int color_numbers_8[28] = { + 0, 4, 2, 6, + 1, 5, 3, 3, + 7, 7, + 7, 7, 0+8, 0+8, + 4+8, 4+8, 2+8, 2+8, + 6+8, 6+8, 1+8, 1+8, 5+8, + 5+8, 3+8, 3+8, 7+8, + -1 + }; + + // Reduce calls to STRICMP a bit, it can be slow. off = TOUPPER_ASC(*arg); - for (i = ARRAY_SIZE(color_names); --i >= 0; ) + for (i = ARRAY_SIZE(color_names); --i >= 0; ) { if (off == color_names[i][0] - && STRICMP(arg + 1, color_names[i] + 1) == 0) + && STRICMP(arg + 1, color_names[i] + 1) == 0) { break; + } + } if (i < 0) { - EMSG2(_( - "E421: Color name or number not recognized: %s"), - key_start); - error = TRUE; + emsgf(_("E421: Color name or number not recognized: %s"), + key_start); + error = true; break; } - /* Use the _16 table to check if its a valid color name. */ + // Use the _16 table to check if its a valid color name. color = color_numbers_16[i]; if (color >= 0) { if (t_colors == 8) { - /* t_Co is 8: use the 8 colors table */ + // t_Co is 8: use the 8 colors table. color = color_numbers_8[i]; if (key[5] == 'F') { /* set/reset bold attribute to get light foreground @@ -6533,16 +6747,14 @@ do_highlight(char_u *line, int forceit, int init) { } } } - /* Add one to the argument, to avoid zero. Zero is used for - * "NONE", then "color" is -1. */ + // Add one to the argument, to avoid zero. Zero is used for + // "NONE", then "color" is -1. if (key[5] == 'F') { HL_TABLE()[idx].sg_cterm_fg = color + 1; if (is_normal_group) { cterm_normal_fg_color = color + 1; cterm_normal_fg_bold = (HL_TABLE()[idx].sg_cterm & HL_BOLD); - { - must_redraw = CLEAR; - } + must_redraw = CLEAR; } } else { HL_TABLE()[idx].sg_cterm_bg = color + 1; @@ -6566,15 +6778,15 @@ do_highlight(char_u *line, int forceit, int init) { } } } - } else if (STRCMP(key, "GUIFG") == 0) { + } else if (strcmp(key, "GUIFG") == 0) { if (!init || !(HL_TABLE()[idx].sg_set & SG_GUI)) { if (!init) HL_TABLE()[idx].sg_set |= SG_GUI; xfree(HL_TABLE()[idx].sg_rgb_fg_name); - if (STRCMP(arg, "NONE")) { - HL_TABLE()[idx].sg_rgb_fg_name = (uint8_t *)xstrdup((char *)arg); - HL_TABLE()[idx].sg_rgb_fg = name_to_color(arg); + if (strcmp(arg, "NONE")) { + HL_TABLE()[idx].sg_rgb_fg_name = (char_u *)xstrdup((char *)arg); + HL_TABLE()[idx].sg_rgb_fg = name_to_color((const char_u *)arg); } else { HL_TABLE()[idx].sg_rgb_fg_name = NULL; HL_TABLE()[idx].sg_rgb_fg = -1; @@ -6591,8 +6803,8 @@ do_highlight(char_u *line, int forceit, int init) { xfree(HL_TABLE()[idx].sg_rgb_bg_name); if (STRCMP(arg, "NONE") != 0) { - HL_TABLE()[idx].sg_rgb_bg_name = (uint8_t *)xstrdup((char *)arg); - HL_TABLE()[idx].sg_rgb_bg = name_to_color(arg); + HL_TABLE()[idx].sg_rgb_bg_name = (char_u *)xstrdup((char *)arg); + HL_TABLE()[idx].sg_rgb_bg = name_to_color((const char_u *)arg); } else { HL_TABLE()[idx].sg_rgb_bg_name = NULL; HL_TABLE()[idx].sg_rgb_bg = -1; @@ -6602,15 +6814,15 @@ do_highlight(char_u *line, int forceit, int init) { if (is_normal_group) { normal_bg = HL_TABLE()[idx].sg_rgb_bg; } - } else if (STRCMP(key, "GUISP") == 0) { + } else if (strcmp(key, "GUISP") == 0) { if (!init || !(HL_TABLE()[idx].sg_set & SG_GUI)) { if (!init) HL_TABLE()[idx].sg_set |= SG_GUI; xfree(HL_TABLE()[idx].sg_rgb_sp_name); - if (STRCMP(arg, "NONE") != 0) { - HL_TABLE()[idx].sg_rgb_sp_name = (uint8_t *)xstrdup((char *)arg); - HL_TABLE()[idx].sg_rgb_sp = name_to_color(arg); + if (strcmp(arg, "NONE") != 0) { + HL_TABLE()[idx].sg_rgb_sp_name = (char_u *)xstrdup((char *)arg); + HL_TABLE()[idx].sg_rgb_sp = name_to_color((const char_u *)arg); } else { HL_TABLE()[idx].sg_rgb_sp_name = NULL; HL_TABLE()[idx].sg_rgb_sp = -1; @@ -6620,31 +6832,25 @@ do_highlight(char_u *line, int forceit, int init) { if (is_normal_group) { normal_sp = HL_TABLE()[idx].sg_rgb_sp; } - } else if (STRCMP(key, "START") == 0 || STRCMP(key, "STOP") == 0) { + } else if (strcmp(key, "START") == 0 || strcmp(key, "STOP") == 0) { // Ignored for now } else { - EMSG2(_("E423: Illegal argument: %s"), key_start); - error = TRUE; + emsgf(_("E423: Illegal argument: %s"), key_start); + error = true; break; } - /* - * When highlighting has been given for a group, don't link it. - */ + // When highlighting has been given for a group, don't link it. if (!init || !(HL_TABLE()[idx].sg_set & SG_LINK)) { HL_TABLE()[idx].sg_link = 0; } - /* - * Continue with next argument. - */ - linep = skipwhite(linep); + // Continue with next argument. + linep = (const char *)skipwhite((const char_u *)linep); } } - /* - * If there is an error, and it's a new entry, remove it from the table. - */ + // If there is an error, and it's a new entry, remove it from the table. if (error && idx == highlight_ga.ga_len) { syn_unadd_group(); } else { @@ -7202,7 +7408,7 @@ char_u *syn_id2name(int id) /* * Like syn_name2id(), but take a pointer + length argument. */ -int syn_namen2id(char_u *linep, int len) +int syn_namen2id(const char_u *linep, int len) { char_u *name = vim_strnsave(linep, len); int id = syn_name2id(name); @@ -7218,7 +7424,7 @@ int syn_namen2id(char_u *linep, int len) /// @param len length of \p pp /// /// @return 0 for failure else the id of the group -int syn_check_group(char_u *pp, int len) +int syn_check_group(const char_u *pp, int len) { char_u *name = vim_strnsave(pp, len); int id = syn_name2id(name); @@ -8221,7 +8427,7 @@ color_name_table_T color_name_table[] = { /// /// @param[in] name string value to convert to RGB /// return the hex value or -1 if could not find a correct value -RgbValue name_to_color(const uint8_t *name) +RgbValue name_to_color(const char_u *name) { if (name[0] == '#' && isxdigit(name[1]) && isxdigit(name[2]) diff --git a/src/nvim/syntax.h b/src/nvim/syntax.h index bb733ead30..c9b0665ec8 100644 --- a/src/nvim/syntax.h +++ b/src/nvim/syntax.h @@ -45,6 +45,9 @@ typedef struct { } color_name_table_T; extern color_name_table_T color_name_table[]; +/// Array of highlight definitions, used for unit testing +extern const char *const highlight_init_cmdline[]; + #ifdef INCLUDE_GENERATED_DECLARATIONS # include "syntax.h.generated.h" #endif diff --git a/src/nvim/version.c b/src/nvim/version.c index 8d85855c6e..1a9c9d73e5 100644 --- a/src/nvim/version.c +++ b/src/nvim/version.c @@ -30,8 +30,8 @@ // for ":version", ":intro", and "nvim --version" #ifndef NVIM_VERSION_MEDIUM #define NVIM_VERSION_MEDIUM "v" STR(NVIM_VERSION_MAJOR)\ - "." STR(NVIM_VERSION_MINOR) "." STR(NVIM_VERSION_PATCH)\ - NVIM_VERSION_PRERELEASE +"." STR(NVIM_VERSION_MINOR) "." STR(NVIM_VERSION_PATCH)\ +NVIM_VERSION_PRERELEASE #endif #define NVIM_VERSION_LONG "NVIM " NVIM_VERSION_MEDIUM @@ -47,33 +47,33 @@ char *version_cflags = "Compilation: " NVIM_VERSION_CFLAGS; static char *features[] = { #ifdef HAVE_ACL - "+acl", +"+acl", #else - "-acl", +"-acl", #endif #if (defined(HAVE_ICONV_H) && defined(USE_ICONV)) || defined(DYNAMIC_ICONV) # ifdef DYNAMIC_ICONV - "+iconv/dyn", +"+iconv/dyn", # else - "+iconv", +"+iconv", # endif #else - "-iconv", +"-iconv", #endif #ifdef HAVE_JEMALLOC - "+jemalloc", +"+jemalloc", #else - "-jemalloc", +"-jemalloc", #endif #ifdef FEAT_TUI - "+tui", +"+tui", #else - "-tui", +"-tui", #endif - NULL +NULL }; // clang-format off @@ -205,8 +205,8 @@ static const int included_patches[] = { // 1233, // 1232, // 1231, - // 1230, - // 1229, + 1230, + 1229, // 1228, // 1227, // 1226, diff --git a/src/nvim/vim.h b/src/nvim/vim.h index 7da3c8246f..b535e380d1 100644 --- a/src/nvim/vim.h +++ b/src/nvim/vim.h @@ -26,13 +26,6 @@ /// length of a buffer to store a number in ASCII (64 bits binary + NUL) enum { NUMBUFLEN = 65 }; -// flags for vim_str2nr() -#define STR2NR_BIN 1 -#define STR2NR_OCT 2 -#define STR2NR_HEX 4 -#define STR2NR_ALL (STR2NR_BIN + STR2NR_OCT + STR2NR_HEX) -#define STR2NR_FORCE 8 // only when ONE of the above is used - #define MAX_TYPENR 65535 #define ROOT_UID 0 diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c new file mode 100644 index 0000000000..4196ecb9d2 --- /dev/null +++ b/src/nvim/viml/parser/expressions.c @@ -0,0 +1,3102 @@ +// This is an open source non-commercial project. Dear PVS-Studio, please check +// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com + +/// VimL expression parser + +// Planned incompatibilities (to be included into vim_diff.txt when this parser +// will be an actual part of VimL evaluation process): +// +// 1. Expressions are first fully parsed and only then executed. This means +// that while ":echo [system('touch abc')" will create file "abc" in Vim and +// only then raise syntax error regarding missing comma in list in Neovim +// trying to execute that will immediately raise syntax error regarding +// missing list end without actually executing anything. +// 2. Expressions are first fully parsed, without considering any runtime +// information. This means things like that "d.a" does not change its +// meaning depending on type of "d" (or whether Vim is currently executing or +// skipping). For compatibility reasons the dot thus may either be “concat +// or subscript” operator or just “concat” operator. +// 3. Expressions parser is aware whether it is called for :echo or <C-r>=. +// This means that while "<C-r>=1 | 2<CR>" is equivalent to "<C-r>=1<CR>" +// because "| 2" part is left to be treated as a command separator and then +// ignored in Neovim it is an error. +// 4. Expressions parser has generally better error reporting. But for +// compatibility reasons most errors have error code E15 while error messages +// are significantly different from Vim’s E15. Also some error codes were +// retired because of being harder to emulate or because of them being +// a result of differences in parsing process: e.g. with ":echo {a, b}" Vim +// will attempt to parse expression as lambda, fail, check whether it is +// a curly-braces-name, fail again, and evaluate that as a dictionary, giving +// error regarding undefined variable "a" (or about missing colon). Neovim +// will not try to evaluate anything here: comma right after an argument name +// means that expression may not be anything, but lambda, so the resulting +// error message will never be about missing variable or colon: it will be +// about missing arrow (or a continuation of argument list). +// 5. Failing to parse expression always gives exactly one error message: no +// more stack of error messages like > +// +// :echo [1, +// E697: Missing end of List ']': +// E15: Invalid expression: [1, +// +// < , just exactly one E697 message. +// 6. Some expressions involving calling parenthesis which are treated +// separately by Vim even when not separated by spaces are treated as one +// expression by Neovim: e.g. ":echo (1)(1)" will yield runtime error after +// failing to call "1", while Vim will echo "1 1". Reasoning is the same: +// type of what is in the first expression is generally not known when +// parsing, so to have separate expressions like this separate them with +// spaces. +// 7. 'isident' no longer applies to environment variables, they always include +// ASCII alphanumeric characters and underscore and nothing except this. + +#include <stdbool.h> +#include <stddef.h> +#include <assert.h> +#include <string.h> + +#include "nvim/vim.h" +#include "nvim/memory.h" +#include "nvim/types.h" +#include "nvim/charset.h" +#include "nvim/ascii.h" +#include "nvim/assert.h" +#include "nvim/lib/kvec.h" +#include "nvim/eval/typval.h" + +#include "nvim/viml/parser/expressions.h" +#include "nvim/viml/parser/parser.h" + +#define vim_str2nr(s, ...) vim_str2nr((const char_u *)(s), __VA_ARGS__) + +typedef kvec_withinit_t(ExprASTNode **, 16) ExprASTStack; + +/// Which nodes may be wanted +typedef enum { + /// Operators: function call, subscripts, binary operators, … + /// + /// For unrestricted expressions. + kENodeOperator, + /// Values: literals, variables, nested expressions, unary operators. + /// + /// For unrestricted expressions as well, implies that top item in AST stack + /// points to NULL. + kENodeValue, +} ExprASTWantedNode; + +/// Parse type: what is being parsed currently +typedef enum { + /// Parsing regular VimL expression + kEPTExpr = 0, + /// Parsing lambda arguments + /// + /// Just like parsing function arguments, but it is valid to be ended with an + /// arrow only. + kEPTLambdaArguments, + /// Assignment: parsing for :let + kEPTAssignment, + /// Single assignment: used when lists are not allowed (i.e. when nesting) + kEPTSingleAssignment, +} ExprASTParseType; + +typedef kvec_withinit_t(ExprASTParseType, 4) ExprASTParseTypeStack; + +/// Operator priority level +typedef enum { + kEOpLvlInvalid = 0, + kEOpLvlComplexIdentifier, + kEOpLvlParens, + kEOpLvlAssignment, + kEOpLvlArrow, + kEOpLvlComma, + kEOpLvlColon, + kEOpLvlTernaryValue, + kEOpLvlTernary, + kEOpLvlOr, + kEOpLvlAnd, + kEOpLvlComparison, + kEOpLvlAddition, ///< Addition, subtraction and concatenation. + kEOpLvlMultiplication, ///< Multiplication, division and modulo. + kEOpLvlUnary, ///< Unary operations: not, minus, plus. + kEOpLvlSubscript, ///< Subscripts. + kEOpLvlValue, ///< Values: literals, variables, nested expressions, … +} ExprOpLvl; + +/// Operator associativity +typedef enum { + kEOpAssNo= 'n', ///< Not associative / not applicable. + kEOpAssLeft = 'l', ///< Left associativity. + kEOpAssRight = 'r', ///< Right associativity. +} ExprOpAssociativity; + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "viml/parser/expressions.c.generated.h" +#endif + +/// Character used as a separator in autoload function/variable names. +#define AUTOLOAD_CHAR '#' + +/// Scale number by a given factor +/// +/// Used to apply exponent to a number. Idea taken from uClibc. +/// +/// @param[in] num Number to scale. Does not bother doing anything if it is +/// zero. +/// @param[in] base Base, should be 10 since non-decimal floating-point +/// numbers are not supported. +/// @param[in] exponent Exponent to scale by. +/// @param[in] exponent_negative True if exponent is negative. +static inline float_T scale_number(const float_T num, + const uint8_t base, + const uvarnumber_T exponent, + const bool exponent_negative) + FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_CONST +{ + if (num == 0 || exponent == 0) { + return num; + } + assert(base); + uvarnumber_T exp = exponent; + float_T p_base = (float_T)base; + float_T ret = num; + while (exp) { + if (exp & 1) { + if (exponent_negative) { + ret /= p_base; + } else { + ret *= p_base; + } + } + exp >>= 1; + p_base *= p_base; + } + return ret; +} + +/// Get next token for the VimL expression input +/// +/// @param pstate Parser state. +/// @param[in] flags Flags, @see LexExprFlags. +/// +/// @return Next token. +LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) + FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL +{ + LexExprToken ret = { + .type = kExprLexInvalid, + .start = pstate->pos, + }; + ParserLine pline; + if (!viml_parser_get_remaining_line(pstate, &pline)) { + ret.type = kExprLexEOC; + return ret; + } + if (pline.size <= 0) { + ret.len = 0; + ret.type = kExprLexEOC; + goto viml_pexpr_next_token_adv_return; + } + ret.len = 1; + const uint8_t schar = (uint8_t)pline.data[0]; +#define GET_CCS(ret, pline) \ + do { \ + if (ret.len < pline.size \ + && strchr("?#", pline.data[ret.len]) != NULL) { \ + ret.data.cmp.ccs = \ + (ExprCaseCompareStrategy)pline.data[ret.len]; \ + ret.len++; \ + } else { \ + ret.data.cmp.ccs = kCCStrategyUseOption; \ + } \ + } while (0) + switch (schar) { + // Paired brackets. +#define BRACKET(typ, opning, clsing) \ + case opning: \ + case clsing: { \ + ret.type = typ; \ + ret.data.brc.closing = (schar == clsing); \ + break; \ + } + BRACKET(kExprLexParenthesis, '(', ')') + BRACKET(kExprLexBracket, '[', ']') + BRACKET(kExprLexFigureBrace, '{', '}') +#undef BRACKET + + // Single character tokens without data. +#define CHAR(typ, ch) \ + case ch: { \ + ret.type = typ; \ + break; \ + } + CHAR(kExprLexQuestion, '?') + CHAR(kExprLexColon, ':') + CHAR(kExprLexComma, ',') +#undef CHAR + + // Multiplication/division/modulo. +#define MUL(mul_type, ch) \ + case ch: { \ + ret.type = kExprLexMultiplication; \ + ret.data.mul.type = mul_type; \ + break; \ + } + MUL(kExprLexMulMul, '*') + MUL(kExprLexMulDiv, '/') + MUL(kExprLexMulMod, '%') +#undef MUL + +#define CHARREG(typ, cond) \ + do { \ + ret.type = typ; \ + for (; (ret.len < pline.size \ + && cond(pline.data[ret.len])) \ + ; ret.len++) { \ + } \ + } while (0) + + // Whitespace. + case ' ': + case TAB: { + CHARREG(kExprLexSpacing, ascii_iswhite); + break; + } + + // Control character, except for NUL, NL and TAB. + case Ctrl_A: case Ctrl_B: case Ctrl_C: case Ctrl_D: case Ctrl_E: + case Ctrl_F: case Ctrl_G: case Ctrl_H: + + case Ctrl_K: case Ctrl_L: case Ctrl_M: case Ctrl_N: case Ctrl_O: + case Ctrl_P: case Ctrl_Q: case Ctrl_R: case Ctrl_S: case Ctrl_T: + case Ctrl_U: case Ctrl_V: case Ctrl_W: case Ctrl_X: case Ctrl_Y: + case Ctrl_Z: { +#define ISCTRL(schar) (schar < ' ') + CHARREG(kExprLexInvalid, ISCTRL); + ret.data.err.type = kExprLexSpacing; + ret.data.err.msg = + _("E15: Invalid control character present in input: %.*s"); + break; +#undef ISCTRL + } + + // Number. + case '0': case '1': case '2': case '3': case '4': case '5': case '6': + case '7': case '8': case '9': { + ret.data.num.is_float = false; + ret.data.num.base = 10; + size_t frac_start = 0; + size_t exp_start = 0; + size_t frac_end = 0; + bool exp_negative = false; + CHARREG(kExprLexNumber, ascii_isdigit); + if (flags & kELFlagAllowFloat) { + const LexExprToken non_float_ret = ret; + if (pline.size > ret.len + 1 + && pline.data[ret.len] == '.' + && ascii_isdigit(pline.data[ret.len + 1])) { + ret.len++; + frac_start = ret.len; + frac_end = ret.len; + ret.data.num.is_float = true; + for (; ret.len < pline.size && ascii_isdigit(pline.data[ret.len]) + ; ret.len++) { + // A small optimization: trailing zeroes in fractional part do not + // add anything to significand, so it is useless to include them in + // frac_end. + if (pline.data[ret.len] != '0') { + frac_end = ret.len + 1; + } + } + if (pline.size > ret.len + 1 + && (pline.data[ret.len] == 'e' + || pline.data[ret.len] == 'E') + && ((pline.size > ret.len + 2 + && (pline.data[ret.len + 1] == '+' + || pline.data[ret.len + 1] == '-') + && ascii_isdigit(pline.data[ret.len + 2])) + || ascii_isdigit(pline.data[ret.len + 1]))) { + ret.len++; + if (pline.data[ret.len] == '+' + || (exp_negative = (pline.data[ret.len] == '-'))) { + ret.len++; + } + exp_start = ret.len; + CHARREG(kExprLexNumber, ascii_isdigit); + } + } + if (pline.size > ret.len + && (pline.data[ret.len] == '.' + || ASCII_ISALPHA(pline.data[ret.len]))) { + ret = non_float_ret; + } + } + // TODO(ZyX-I): detect overflows + if (ret.data.num.is_float) { + // Vim used to use string2float here which in turn uses strtod(). There + // are two problems with this approach: + // 1. strtod() is locale-dependent. Not sure how it is worked around so + // that I do not see relevant bugs, but it still does not look like + // a good idea. + // 2. strtod() does not accept length argument. + // + // The below variant of parsing floats was recognized as acceptable + // because it is basically how uClibc does the thing: it generates + // a number ignoring decimal point (but recording its position), then + // uses recorded position to scale number down when processing exponent. + float_T significand_part = 0; + uvarnumber_T exp_part = 0; + const size_t frac_size = (size_t)(frac_end - frac_start); + for (size_t i = 0; i < frac_end; i++) { + if (i == frac_start - 1) { + continue; + } + significand_part = significand_part * 10 + (pline.data[i] - '0'); + } + if (exp_start) { + vim_str2nr(pline.data + exp_start, NULL, NULL, 0, NULL, &exp_part, + (int)(ret.len - exp_start)); + } + if (exp_negative) { + exp_part += frac_size; + } else { + if (exp_part < frac_size) { + exp_negative = true; + exp_part = frac_size - exp_part; + } else { + exp_part -= frac_size; + } + } + ret.data.num.val.floating = scale_number(significand_part, 10, exp_part, + exp_negative); + } else { + int len; + int prep; + vim_str2nr(pline.data, &prep, &len, STR2NR_ALL, NULL, + &ret.data.num.val.integer, (int)pline.size); + ret.len = (size_t)len; + const uint8_t bases[] = { + [0] = 10, + ['0'] = 8, + ['x'] = 16, ['X'] = 16, + ['b'] = 2, ['B'] = 2, + }; + ret.data.num.base = bases[prep]; + } + break; + } + +#define ISWORD_OR_AUTOLOAD(x) \ + (ascii_isident(x) || (x) == AUTOLOAD_CHAR) + + // Environment variable. + case '$': { + CHARREG(kExprLexEnv, ascii_isident); + break; + } + + // Normal variable/function name. + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'V': case 'W': case 'X': case 'Y': case 'Z': + case '_': { + ret.data.var.scope = 0; + ret.data.var.autoload = false; + CHARREG(kExprLexPlainIdentifier, ascii_isident); + // "is" and "isnot" operators. + if (!(flags & kELFlagIsNotCmp) + && ((ret.len == 2 && memcmp(pline.data, "is", 2) == 0) + || (ret.len == 5 && memcmp(pline.data, "isnot", 5) == 0))) { + ret.type = kExprLexComparison; + ret.data.cmp.type = kExprCmpIdentical; + ret.data.cmp.inv = (ret.len == 5); + GET_CCS(ret, pline); + // Scope: `s:`, etc. + } else if (ret.len == 1 + && pline.size > 1 + && memchr(EXPR_VAR_SCOPE_LIST, schar, + sizeof(EXPR_VAR_SCOPE_LIST)) != NULL + && pline.data[ret.len] == ':' + && !(flags & kELFlagForbidScope)) { + ret.len++; + ret.data.var.scope = (ExprVarScope)schar; + CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD); + ret.data.var.autoload = ( + memchr(pline.data + 2, AUTOLOAD_CHAR, ret.len - 2) + != NULL); + // Previous CHARREG stopped at autoload character in order to make it + // possible to detect `is#`. Continue now with autoload characters + // included. + // + // Warning: there is ambiguity for the lexer: `is#Foo(1)` is a call of + // function `is#Foo()`, `1is#Foo(1)` is a comparison `1 is# Foo(1)`. This + // needs to be resolved on the higher level where context is available. + } else if (pline.size > ret.len + && pline.data[ret.len] == AUTOLOAD_CHAR) { + ret.data.var.autoload = true; + CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD); + } + break; + } + +#undef ISWORD_OR_AUTOLOAD +#undef CHARREG + + // Option. + case '&': { +#define OPTNAMEMISS(ret) \ + do { \ + ret.type = kExprLexInvalid; \ + ret.data.err.type = kExprLexOption; \ + ret.data.err.msg = _("E112: Option name missing: %.*s"); \ + } while (0) + if (pline.size > 1 && pline.data[1] == '&') { + ret.type = kExprLexAnd; + ret.len++; + break; + } + if (pline.size == 1 || !ASCII_ISALPHA(pline.data[1])) { + OPTNAMEMISS(ret); + break; + } + ret.type = kExprLexOption; + if (pline.size > 2 + && pline.data[2] == ':' + && memchr(EXPR_OPT_SCOPE_LIST, pline.data[1], + sizeof(EXPR_OPT_SCOPE_LIST)) != NULL) { + ret.len += 2; + ret.data.opt.scope = (ExprOptScope)pline.data[1]; + ret.data.opt.name = pline.data + 3; + } else { + ret.data.opt.scope = kExprOptScopeUnspecified; + ret.data.opt.name = pline.data + 1; + } + const char *p = ret.data.opt.name; + const char *const e = pline.data + pline.size; + if (e - p >= 4 && p[0] == 't' && p[1] == '_') { + ret.data.opt.len = 4; + ret.len += 4; + } else { + for (; p < e && ASCII_ISALPHA(*p); p++) { + } + ret.data.opt.len = (size_t)(p - ret.data.opt.name); + if (ret.data.opt.len == 0) { + OPTNAMEMISS(ret); + } else { + ret.len += ret.data.opt.len; + } + } + break; +#undef OPTNAMEMISS + } + + // Register. + case '@': { + ret.type = kExprLexRegister; + if (pline.size > 1) { + ret.len++; + ret.data.reg.name = (uint8_t)pline.data[1]; + } else { + ret.data.reg.name = -1; + } + break; + } + + // Single quoted string. + case '\'': { + ret.type = kExprLexSingleQuotedString; + ret.data.str.closed = false; + for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) { + if (pline.data[ret.len] == '\'') { + if (ret.len + 1 < pline.size && pline.data[ret.len + 1] == '\'') { + ret.len++; + } else { + ret.data.str.closed = true; + } + } + } + break; + } + + // Double quoted string. + case '"': { + ret.type = kExprLexDoubleQuotedString; + ret.data.str.closed = false; + for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) { + if (pline.data[ret.len] == '\\') { + if (ret.len + 1 < pline.size) { + ret.len++; + } + } else if (pline.data[ret.len] == '"') { + ret.data.str.closed = true; + } + } + break; + } + + // Unary not, (un)equality and regex (not) match comparison operators. + case '!': + case '=': { + if (pline.size == 1) { + ret.type = (schar == '!' ? kExprLexNot : kExprLexAssignment); + ret.data.ass.type = kExprAsgnPlain; + break; + } + ret.type = kExprLexComparison; + ret.data.cmp.inv = (schar == '!'); + if (pline.data[1] == '=') { + ret.data.cmp.type = kExprCmpEqual; + ret.len++; + } else if (pline.data[1] == '~') { + ret.data.cmp.type = kExprCmpMatches; + ret.len++; + } else if (schar == '!') { + ret.type = kExprLexNot; + } else { + ret.type = kExprLexAssignment; + ret.data.ass.type = kExprAsgnPlain; + } + GET_CCS(ret, pline); + break; + } + + // Less/greater [or equal to] comparison operators. + case '>': + case '<': { + ret.type = kExprLexComparison; + const bool haseqsign = (pline.size > 1 && pline.data[1] == '='); + if (haseqsign) { + ret.len++; + } + GET_CCS(ret, pline); + ret.data.cmp.inv = (schar == '<'); + ret.data.cmp.type = ((ret.data.cmp.inv ^ haseqsign) + ? kExprCmpGreaterOrEqual + : kExprCmpGreater); + break; + } + + // Minus sign, arrow from lambdas or augmented assignment. + case '-': { + if (pline.size > 1 && pline.data[1] == '>') { + ret.len++; + ret.type = kExprLexArrow; + } else if (pline.size > 1 && pline.data[1] == '=') { + ret.len++; + ret.type = kExprLexAssignment; + ret.data.ass.type = kExprAsgnSubtract; + } else { + ret.type = kExprLexMinus; + } + break; + } + + // Sign or augmented assignment. +#define CHAR_OR_ASSIGN(ch, ch_type, ass_type) \ + case ch: { \ + if (pline.size > 1 && pline.data[1] == '=') { \ + ret.len++; \ + ret.type = kExprLexAssignment; \ + ret.data.ass.type = ass_type; \ + } else { \ + ret.type = ch_type; \ + } \ + break; \ + } + CHAR_OR_ASSIGN('+', kExprLexPlus, kExprAsgnAdd) + CHAR_OR_ASSIGN('.', kExprLexDot, kExprAsgnConcat) +#undef CHAR_OR_ASSIGN + + // Expression end because Ex command ended. + case NUL: + case NL: { + if (flags & kELFlagForbidEOC) { + ret.type = kExprLexInvalid; + ret.data.err.msg = _("E15: Unexpected EOC character: %.*s"); + ret.data.err.type = kExprLexSpacing; + } else { + ret.type = kExprLexEOC; + } + break; + } + + case '|': { + if (pline.size >= 2 && pline.data[ret.len] == '|') { + // "||" is or. + ret.len++; + ret.type = kExprLexOr; + } else if (flags & kELFlagForbidEOC) { + // Note: `<C-r>=1 | 2<CR>` actually yields 1 in Vim without any + // errors. This will be changed here. + ret.type = kExprLexInvalid; + ret.data.err.msg = _("E15: Unexpected EOC character: %.*s"); + ret.data.err.type = kExprLexOr; + } else { + ret.type = kExprLexEOC; + } + break; + } + + // Everything else is not valid. + default: { + ret.len = (size_t)utfc_ptr2len_len((const char_u *)pline.data, + (int)pline.size); + ret.type = kExprLexInvalid; + ret.data.err.type = kExprLexPlainIdentifier; + ret.data.err.msg = _("E15: Unidentified character: %.*s"); + break; + } + } +#undef GET_CCS +viml_pexpr_next_token_adv_return: + if (!(flags & kELFlagPeek)) { + viml_parser_advance(pstate, ret.len); + } + return ret; +} + +static const char *const eltkn_type_tab[] = { + [kExprLexInvalid] = "Invalid", + [kExprLexMissing] = "Missing", + [kExprLexSpacing] = "Spacing", + [kExprLexEOC] = "EOC", + + [kExprLexQuestion] = "Question", + [kExprLexColon] = "Colon", + [kExprLexOr] = "Or", + [kExprLexAnd] = "And", + [kExprLexComparison] = "Comparison", + [kExprLexPlus] = "Plus", + [kExprLexMinus] = "Minus", + [kExprLexDot] = "Dot", + [kExprLexMultiplication] = "Multiplication", + + [kExprLexNot] = "Not", + + [kExprLexNumber] = "Number", + [kExprLexSingleQuotedString] = "SingleQuotedString", + [kExprLexDoubleQuotedString] = "DoubleQuotedString", + [kExprLexOption] = "Option", + [kExprLexRegister] = "Register", + [kExprLexEnv] = "Env", + [kExprLexPlainIdentifier] = "PlainIdentifier", + + [kExprLexBracket] = "Bracket", + [kExprLexFigureBrace] = "FigureBrace", + [kExprLexParenthesis] = "Parenthesis", + [kExprLexComma] = "Comma", + [kExprLexArrow] = "Arrow", + [kExprLexAssignment] = "Assignment", +}; + +const char *const eltkn_cmp_type_tab[] = { + [kExprCmpEqual] = "Equal", + [kExprCmpMatches] = "Matches", + [kExprCmpGreater] = "Greater", + [kExprCmpGreaterOrEqual] = "GreaterOrEqual", + [kExprCmpIdentical] = "Identical", +}; + +const char *const expr_asgn_type_tab[] = { + [kExprAsgnPlain] = "Plain", + [kExprAsgnAdd] = "Add", + [kExprAsgnSubtract] = "Subtract", + [kExprAsgnConcat] = "Concat", +}; + +const char *const ccs_tab[] = { + [kCCStrategyUseOption] = "UseOption", + [kCCStrategyMatchCase] = "MatchCase", + [kCCStrategyIgnoreCase] = "IgnoreCase", +}; + +static const char *const eltkn_mul_type_tab[] = { + [kExprLexMulMul] = "Mul", + [kExprLexMulDiv] = "Div", + [kExprLexMulMod] = "Mod", +}; + +static const char *const eltkn_opt_scope_tab[] = { + [kExprOptScopeUnspecified] = "Unspecified", + [kExprOptScopeGlobal] = "Global", + [kExprOptScopeLocal] = "Local", +}; + +/// Represent token as a string +/// +/// Intended for testing and debugging purposes. +/// +/// @param[in] pstate Parser state, needed to get token string from it. May be +/// NULL, in which case in place of obtaining part of the +/// string represented by token only token length is +/// returned. +/// @param[in] token Token to represent. +/// @param[out] ret_size Return string size, for cases like NULs inside +/// a string. May be NULL. +/// +/// @return Token represented in a string form, in a static buffer (overwritten +/// on each call). +const char *viml_pexpr_repr_token(const ParserState *const pstate, + const LexExprToken token, + size_t *const ret_size) + FUNC_ATTR_WARN_UNUSED_RESULT +{ + static char ret[1024]; + char *p = ret; + const char *const e = &ret[1024] - 1; +#define ADDSTR(...) \ + do { \ + p += snprintf(p, (size_t)(sizeof(ret) - (size_t)(p - ret)), __VA_ARGS__); \ + if (p >= e) { \ + goto viml_pexpr_repr_token_end; \ + } \ + } while (0) + ADDSTR("%zu:%zu:%s", token.start.line, token.start.col, + eltkn_type_tab[token.type]); + switch (token.type) { +#define TKNARGS(tkn_type, ...) \ + case tkn_type: { \ + ADDSTR(__VA_ARGS__); \ + break; \ + } + TKNARGS(kExprLexComparison, "(type=%s,ccs=%s,inv=%i)", + eltkn_cmp_type_tab[token.data.cmp.type], + ccs_tab[token.data.cmp.ccs], + (int)token.data.cmp.inv) + TKNARGS(kExprLexMultiplication, "(type=%s)", + eltkn_mul_type_tab[token.data.mul.type]) + TKNARGS(kExprLexAssignment, "(type=%s)", + expr_asgn_type_tab[token.data.ass.type]) + TKNARGS(kExprLexRegister, "(name=%s)", intchar2str(token.data.reg.name)) + case kExprLexDoubleQuotedString: + TKNARGS(kExprLexSingleQuotedString, "(closed=%i)", + (int)token.data.str.closed) + TKNARGS(kExprLexOption, "(scope=%s,name=%.*s)", + eltkn_opt_scope_tab[token.data.opt.scope], + (int)token.data.opt.len, token.data.opt.name) + TKNARGS(kExprLexPlainIdentifier, "(scope=%s,autoload=%i)", + intchar2str(token.data.var.scope), (int)token.data.var.autoload) + TKNARGS(kExprLexNumber, "(is_float=%i,base=%i,val=%lg)", + (int)token.data.num.is_float, + (int)token.data.num.base, + (double)(token.data.num.is_float + ? (double)token.data.num.val.floating + : (double)token.data.num.val.integer)) + TKNARGS(kExprLexInvalid, "(msg=%s)", token.data.err.msg) + default: { + // No additional arguments. + break; + } +#undef TKNARGS + } + if (pstate == NULL) { + ADDSTR("::%zu", token.len); + } else { + *p++ = ':'; + memmove( + p, &pstate->reader.lines.items[token.start.line].data[token.start.col], + token.len); + p += token.len; + *p = NUL; + } +#undef ADDSTR +viml_pexpr_repr_token_end: + if (ret_size != NULL) { + *ret_size = (size_t)(p - ret); + } + return ret; +} + +const char *const east_node_type_tab[] = { + [kExprNodeMissing] = "Missing", + [kExprNodeOpMissing] = "OpMissing", + [kExprNodeTernary] = "Ternary", + [kExprNodeTernaryValue] = "TernaryValue", + [kExprNodeRegister] = "Register", + [kExprNodeSubscript] = "Subscript", + [kExprNodeListLiteral] = "ListLiteral", + [kExprNodeUnaryPlus] = "UnaryPlus", + [kExprNodeBinaryPlus] = "BinaryPlus", + [kExprNodeNested] = "Nested", + [kExprNodeCall] = "Call", + [kExprNodePlainIdentifier] = "PlainIdentifier", + [kExprNodePlainKey] = "PlainKey", + [kExprNodeComplexIdentifier] = "ComplexIdentifier", + [kExprNodeUnknownFigure] = "UnknownFigure", + [kExprNodeLambda] = "Lambda", + [kExprNodeDictLiteral] = "DictLiteral", + [kExprNodeCurlyBracesIdentifier] = "CurlyBracesIdentifier", + [kExprNodeComma] = "Comma", + [kExprNodeColon] = "Colon", + [kExprNodeArrow] = "Arrow", + [kExprNodeComparison] = "Comparison", + [kExprNodeConcat] = "Concat", + [kExprNodeConcatOrSubscript] = "ConcatOrSubscript", + [kExprNodeInteger] = "Integer", + [kExprNodeFloat] = "Float", + [kExprNodeSingleQuotedString] = "SingleQuotedString", + [kExprNodeDoubleQuotedString] = "DoubleQuotedString", + [kExprNodeOr] = "Or", + [kExprNodeAnd] = "And", + [kExprNodeUnaryMinus] = "UnaryMinus", + [kExprNodeBinaryMinus] = "BinaryMinus", + [kExprNodeNot] = "Not", + [kExprNodeMultiplication] = "Multiplication", + [kExprNodeDivision] = "Division", + [kExprNodeMod] = "Mod", + [kExprNodeOption] = "Option", + [kExprNodeEnvironment] = "Environment", + [kExprNodeAssignment] = "Assignment", +}; + +/// Represent `int` character as a string +/// +/// Converts +/// - ASCII digits into '{digit}' +/// - ASCII printable characters into a single-character strings +/// - everything else to numbers. +/// +/// @param[in] ch Character to convert. +/// +/// @return Converted string, stored in a static buffer (overriden after each +/// call). +static const char *intchar2str(const int ch) + FUNC_ATTR_WARN_UNUSED_RESULT +{ + static char buf[sizeof(int) * 3 + 1]; + if (' ' <= ch && ch < 0x7f) { + if (ascii_isdigit(ch)) { + buf[0] = '\''; + buf[1] = (char)ch; + buf[2] = '\''; + buf[3] = NUL; + } else { + buf[0] = (char)ch; + buf[1] = NUL; + } + } else { + snprintf(buf, sizeof(buf), "%i", ch); + } + return buf; +} + +#ifdef UNIT_TESTING +#include <stdio.h> + +REAL_FATTR_UNUSED +static inline void viml_pexpr_debug_print_ast_node( + const ExprASTNode *const *const eastnode_p, + const char *const prefix) +{ + if (*eastnode_p == NULL) { + fprintf(stderr, "%s %p : NULL\n", prefix, (void *)eastnode_p); + } else { + fprintf(stderr, "%s %p : %p : %s : %zu:%zu:%zu\n", + prefix, (void *)eastnode_p, (void *)(*eastnode_p), + east_node_type_tab[(*eastnode_p)->type], (*eastnode_p)->start.line, + (*eastnode_p)->start.col, (*eastnode_p)->len); + } +} + +REAL_FATTR_UNUSED +static inline void viml_pexpr_debug_print_ast_stack( + const ExprASTStack *const ast_stack, + const char *const msg) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE +{ + fprintf(stderr, "\n%sstack: %zu:\n", msg, kv_size(*ast_stack)); + for (size_t i = 0; i < kv_size(*ast_stack); i++) { + viml_pexpr_debug_print_ast_node( + (const ExprASTNode *const *)kv_A(*ast_stack, i), + "-"); + } +} + +REAL_FATTR_UNUSED +static inline void viml_pexpr_debug_print_token( + const ParserState *const pstate, const LexExprToken token) + FUNC_ATTR_ALWAYS_INLINE +{ + fprintf(stderr, "\ntkn: %s\n", viml_pexpr_repr_token(pstate, token, NULL)); +} +#define PSTACK(msg) \ + viml_pexpr_debug_print_ast_stack(&ast_stack, #msg) +#define PSTACK_P(msg) \ + viml_pexpr_debug_print_ast_stack(ast_stack, #msg) +#define PNODE_P(eastnode_p, msg) \ + viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)eastnode_p, \ + (#msg)) +#define PTOKEN(tkn) \ + viml_pexpr_debug_print_token(pstate, tkn) +#endif + +const uint8_t node_maxchildren[] = { + [kExprNodeMissing] = 0, + [kExprNodeOpMissing] = 2, + [kExprNodeTernary] = 2, + [kExprNodeTernaryValue] = 2, + [kExprNodeRegister] = 0, + [kExprNodeSubscript] = 2, + [kExprNodeListLiteral] = 1, + [kExprNodeUnaryPlus] = 1, + [kExprNodeBinaryPlus] = 2, + [kExprNodeNested] = 1, + [kExprNodeCall] = 2, + [kExprNodePlainIdentifier] = 0, + [kExprNodePlainKey] = 0, + [kExprNodeComplexIdentifier] = 2, + [kExprNodeUnknownFigure] = 1, + [kExprNodeLambda] = 2, + [kExprNodeDictLiteral] = 1, + [kExprNodeCurlyBracesIdentifier] = 1, + [kExprNodeComma] = 2, + [kExprNodeColon] = 2, + [kExprNodeArrow] = 2, + [kExprNodeComparison] = 2, + [kExprNodeConcat] = 2, + [kExprNodeConcatOrSubscript] = 2, + [kExprNodeInteger] = 0, + [kExprNodeFloat] = 0, + [kExprNodeSingleQuotedString] = 0, + [kExprNodeDoubleQuotedString] = 0, + [kExprNodeOr] = 2, + [kExprNodeAnd] = 2, + [kExprNodeUnaryMinus] = 1, + [kExprNodeBinaryMinus] = 2, + [kExprNodeNot] = 1, + [kExprNodeMultiplication] = 2, + [kExprNodeDivision] = 2, + [kExprNodeMod] = 2, + [kExprNodeOption] = 0, + [kExprNodeEnvironment] = 0, + [kExprNodeAssignment] = 2, +}; + +/// Free memory occupied by AST +/// +/// @param ast AST stack to free. +void viml_pexpr_free_ast(ExprAST ast) +{ + ExprASTStack ast_stack; + kvi_init(ast_stack); + kvi_push(ast_stack, &ast.root); + while (kv_size(ast_stack)) { + ExprASTNode **const cur_node = kv_last(ast_stack); +#ifndef NDEBUG + // Explicitly check for AST recursiveness. + for (size_t i = 0 ; i < kv_size(ast_stack) - 1 ; i++) { + assert(*kv_A(ast_stack, i) != *cur_node); + } +#endif + if (*cur_node == NULL) { + assert(kv_size(ast_stack) == 1); + kv_drop(ast_stack, 1); + } else if ((*cur_node)->children != NULL) { +#ifndef NDEBUG + const uint8_t maxchildren = node_maxchildren[(*cur_node)->type]; + assert(maxchildren > 0); + assert(maxchildren <= 2); + assert(maxchildren == 1 + ? (*cur_node)->children->next == NULL + : ((*cur_node)->children->next == NULL + || (*cur_node)->children->next->next == NULL)); +#endif + kvi_push(ast_stack, &(*cur_node)->children); + } else if ((*cur_node)->next != NULL) { + kvi_push(ast_stack, &(*cur_node)->next); + } else if (*cur_node != NULL) { + kv_drop(ast_stack, 1); + switch ((*cur_node)->type) { + case kExprNodeDoubleQuotedString: + case kExprNodeSingleQuotedString: { + xfree((*cur_node)->data.str.value); + break; + } + case kExprNodeMissing: + case kExprNodeOpMissing: + case kExprNodeTernary: + case kExprNodeTernaryValue: + case kExprNodeRegister: + case kExprNodeSubscript: + case kExprNodeListLiteral: + case kExprNodeUnaryPlus: + case kExprNodeBinaryPlus: + case kExprNodeNested: + case kExprNodeCall: + case kExprNodePlainIdentifier: + case kExprNodePlainKey: + case kExprNodeComplexIdentifier: + case kExprNodeUnknownFigure: + case kExprNodeLambda: + case kExprNodeDictLiteral: + case kExprNodeCurlyBracesIdentifier: + case kExprNodeAssignment: + case kExprNodeComma: + case kExprNodeColon: + case kExprNodeArrow: + case kExprNodeComparison: + case kExprNodeConcat: + case kExprNodeConcatOrSubscript: + case kExprNodeInteger: + case kExprNodeFloat: + case kExprNodeOr: + case kExprNodeAnd: + case kExprNodeUnaryMinus: + case kExprNodeBinaryMinus: + case kExprNodeNot: + case kExprNodeMultiplication: + case kExprNodeDivision: + case kExprNodeMod: + case kExprNodeOption: + case kExprNodeEnvironment: { + break; + } + } + xfree(*cur_node); + *cur_node = NULL; + } + } + kvi_destroy(ast_stack); +} + +// Binary operator precedence and associativity: +// +// Operator | Precedence | Associativity +// ---------+------------+----------------- +// || | 2 | left +// && | 3 | left +// cmp* | 4 | not associative +// + - . | 5 | left +// * / % | 6 | left +// +// * comparison operators: +// +// == ==# ==? != !=# !=? +// =~ =~# =~? !~ !~# !~? +// > ># >? <= <=# <=? +// < <# <? >= >=# >=? +// is is# is? isnot isnot# isnot? + +/// Allocate a new node and set some of the values +/// +/// @param[in] type Node type to allocate. +/// @param[in] level Node level to allocate +static inline ExprASTNode *viml_pexpr_new_node(const ExprASTNodeType type) + FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_MALLOC +{ + ExprASTNode *ret = xmalloc(sizeof(*ret)); + ret->type = type; + ret->children = NULL; + ret->next = NULL; + return ret; +} + +static struct { + ExprOpLvl lvl; + ExprOpAssociativity ass; +} node_type_to_node_props[] = { + [kExprNodeMissing] = { kEOpLvlInvalid, kEOpAssNo, }, + [kExprNodeOpMissing] = { kEOpLvlMultiplication, kEOpAssNo }, + + [kExprNodeNested] = { kEOpLvlParens, kEOpAssNo }, + // Note: below nodes are kEOpLvlSubscript for “binary operator” itself, but + // kEOpLvlParens when it comes to inside the parenthesis. + [kExprNodeCall] = { kEOpLvlParens, kEOpAssNo }, + [kExprNodeSubscript] = { kEOpLvlParens, kEOpAssNo }, + + [kExprNodeUnknownFigure] = { kEOpLvlParens, kEOpAssLeft }, + [kExprNodeLambda] = { kEOpLvlParens, kEOpAssNo }, + [kExprNodeDictLiteral] = { kEOpLvlParens, kEOpAssNo }, + [kExprNodeListLiteral] = { kEOpLvlParens, kEOpAssNo }, + + [kExprNodeArrow] = { kEOpLvlArrow, kEOpAssNo }, + + // Right associativity for comma because this means easier access to arguments + // list, etc: for "[a, b, c, d]" you can access "a" in one step if it is + // represented as "list(comma(a, comma(b, comma(c, d))))" then if it is + // "list(comma(comma(comma(a, b), c), d))" in which case you will need to + // traverse all three comma() structures. And with comma operator (including + // actual comma operator from C which is not present in VimL) nobody cares + // about associativity, only about order of execution. + [kExprNodeComma] = { kEOpLvlComma, kEOpAssRight }, + + // Colons are not eligible for chaining, so nobody cares about associativity. + [kExprNodeColon] = { kEOpLvlColon, kEOpAssNo }, + + [kExprNodeTernary] = { kEOpLvlTernary, kEOpAssRight }, + + [kExprNodeOr] = { kEOpLvlOr, kEOpAssLeft }, + + [kExprNodeAnd] = { kEOpLvlAnd, kEOpAssLeft }, + + [kExprNodeTernaryValue] = { kEOpLvlTernaryValue, kEOpAssRight }, + + [kExprNodeComparison] = { kEOpLvlComparison, kEOpAssRight }, + + [kExprNodeBinaryPlus] = { kEOpLvlAddition, kEOpAssLeft }, + [kExprNodeBinaryMinus] = { kEOpLvlAddition, kEOpAssLeft }, + [kExprNodeConcat] = { kEOpLvlAddition, kEOpAssLeft }, + + [kExprNodeMultiplication] = { kEOpLvlMultiplication, kEOpAssLeft }, + [kExprNodeDivision] = { kEOpLvlMultiplication, kEOpAssLeft }, + [kExprNodeMod] = { kEOpLvlMultiplication, kEOpAssLeft }, + + [kExprNodeUnaryPlus] = { kEOpLvlUnary, kEOpAssNo }, + [kExprNodeUnaryMinus] = { kEOpLvlUnary, kEOpAssNo }, + [kExprNodeNot] = { kEOpLvlUnary, kEOpAssNo }, + + [kExprNodeConcatOrSubscript] = { kEOpLvlSubscript, kEOpAssLeft }, + + [kExprNodeCurlyBracesIdentifier] = { kEOpLvlComplexIdentifier, kEOpAssLeft }, + + [kExprNodeAssignment] = { kEOpLvlAssignment, kEOpAssLeft }, + + [kExprNodeComplexIdentifier] = { kEOpLvlValue, kEOpAssLeft }, + + [kExprNodePlainIdentifier] = { kEOpLvlValue, kEOpAssNo }, + [kExprNodePlainKey] = { kEOpLvlValue, kEOpAssNo }, + [kExprNodeRegister] = { kEOpLvlValue, kEOpAssNo }, + [kExprNodeInteger] = { kEOpLvlValue, kEOpAssNo }, + [kExprNodeFloat] = { kEOpLvlValue, kEOpAssNo }, + [kExprNodeDoubleQuotedString] = { kEOpLvlValue, kEOpAssNo }, + [kExprNodeSingleQuotedString] = { kEOpLvlValue, kEOpAssNo }, + [kExprNodeOption] = { kEOpLvlValue, kEOpAssNo }, + [kExprNodeEnvironment] = { kEOpLvlValue, kEOpAssNo }, +}; + +/// Get AST node priority level +/// +/// Used primary to reduce line length, so keep the name short. +/// +/// @param[in] node Node to get priority for. +/// +/// @return Node priority level. +static inline ExprOpLvl node_lvl(const ExprASTNode node) + FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT +{ + return node_type_to_node_props[node.type].lvl; +} + +/// Get AST node associativity, to be used for operator nodes primary +/// +/// Used primary to reduce line length, so keep the name short. +/// +/// @param[in] node Node to get priority for. +/// +/// @return Node associativity. +static inline ExprOpAssociativity node_ass(const ExprASTNode node) + FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT +{ + return node_type_to_node_props[node.type].ass; +} + +/// Handle binary operator +/// +/// This function is responsible for handling priority levels as well. +/// +/// @param[in] pstate Parser state, used for error reporting. +/// @param ast_stack AST stack. May be popped of some values and will +/// definitely receive new ones. +/// @param bop_node New node to handle. +/// @param[out] want_node_p New value of want_node. +/// @param[out] ast_err Location where error is saved, if any. +/// +/// @return True if no errors occurred, false otherwise. +static bool viml_pexpr_handle_bop(const ParserState *const pstate, + ExprASTStack *const ast_stack, + ExprASTNode *const bop_node, + ExprASTWantedNode *const want_node_p, + ExprASTError *const ast_err) + FUNC_ATTR_NONNULL_ALL +{ + bool ret = true; + ExprASTNode **top_node_p = NULL; + ExprASTNode *top_node; + ExprOpLvl top_node_lvl; + ExprOpAssociativity top_node_ass; + assert(kv_size(*ast_stack)); + const ExprOpLvl bop_node_lvl = ((bop_node->type == kExprNodeCall + || bop_node->type == kExprNodeSubscript) + ? kEOpLvlSubscript + : node_lvl(*bop_node)); +#ifndef NDEBUG + const ExprOpAssociativity bop_node_ass = ( + (bop_node->type == kExprNodeCall + || bop_node->type == kExprNodeSubscript) + ? kEOpAssLeft + : node_ass(*bop_node)); +#endif + do { + ExprASTNode **new_top_node_p = kv_last(*ast_stack); + ExprASTNode *new_top_node = *new_top_node_p; + assert(new_top_node != NULL); + const ExprOpLvl new_top_node_lvl = node_lvl(*new_top_node); + const ExprOpAssociativity new_top_node_ass = node_ass(*new_top_node); + assert(bop_node_lvl != new_top_node_lvl + || bop_node_ass == new_top_node_ass); + if (top_node_p != NULL + && ((bop_node_lvl > new_top_node_lvl + || (bop_node_lvl == new_top_node_lvl + && new_top_node_ass == kEOpAssNo)))) { + break; + } + kv_drop(*ast_stack, 1); + top_node_p = new_top_node_p; + top_node = new_top_node; + top_node_lvl = new_top_node_lvl; + top_node_ass = new_top_node_ass; + if (bop_node_lvl == top_node_lvl && top_node_ass == kEOpAssRight) { + break; + } + } while (kv_size(*ast_stack)); + if (top_node_ass == kEOpAssLeft || top_node_lvl != bop_node_lvl) { + // outer(op(x,y)) -> outer(new_op(op(x,y),*)) + // + // Before: top_node_p = outer(*), points to op(x,y) + // Other stack elements unknown + // + // After: top_node_p = outer(*), points to new_op(op(x,y)) + // &bop_node->children->next = new_op(op(x,y),*), points to NULL + *top_node_p = bop_node; + bop_node->children = top_node; + assert(bop_node->children->next == NULL); + kvi_push(*ast_stack, top_node_p); + kvi_push(*ast_stack, &bop_node->children->next); + } else { + assert(top_node_lvl == bop_node_lvl && top_node_ass == kEOpAssRight); + assert(top_node->children != NULL && top_node->children->next != NULL); + // outer(op(x,y)) -> outer(op(x,new_op(y,*))) + // + // Before: top_node_p = outer(*), points to op(x,y) + // Other stack elements unknown + // + // After: top_node_p = outer(*), points to op(x,new_op(y)) + // &top_node->children->next = op(x,*), points to new_op(y) + // &bop_node->children->next = new_op(y,*), points to NULL + bop_node->children = top_node->children->next; + top_node->children->next = bop_node; + assert(bop_node->children->next == NULL); + kvi_push(*ast_stack, top_node_p); + kvi_push(*ast_stack, &top_node->children->next); + kvi_push(*ast_stack, &bop_node->children->next); + // TODO(ZyX-I): Make this not error, but treat like Python does + if (bop_node->type == kExprNodeComparison) { + east_set_error(pstate, ast_err, + _("E15: Operator is not associative: %.*s"), + bop_node->start); + ret = false; + } + } + *want_node_p = kENodeValue; + return ret; +} + +/// ParserPosition literal based on ParserPosition pos with columns shifted +/// +/// Function does not check whether resulting position is valid. +/// +/// @param[in] pos Position to shift. +/// @param[in] shift Number of bytes to shift. +/// +/// @return Shifted position. +static inline ParserPosition shifted_pos(const ParserPosition pos, + const size_t shift) + FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT +{ + return (ParserPosition) { .line = pos.line, .col = pos.col + shift }; +} + +/// ParserPosition literal based on ParserPosition pos with specified column +/// +/// Function does not check whether remaining position is valid. +/// +/// @param[in] pos Position to adjust. +/// @param[in] new_col New column. +/// +/// @return Shifted position. +static inline ParserPosition recol_pos(const ParserPosition pos, + const size_t new_col) + FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT +{ + return (ParserPosition) { .line = pos.line, .col = new_col }; +} + +/// Get highlight group name +#define HL(g) (is_invalid ? "NvimInvalid" #g : "Nvim" #g) + +/// Highlight current token with the given group +#define HL_CUR_TOKEN(g) \ + viml_parser_highlight(pstate, cur_token.start, cur_token.len, \ + HL(g)) + +/// Allocate new node, saving some values +#define NEW_NODE(type) \ + viml_pexpr_new_node(type) + +/// Set position of the given node to position from the given token +/// +/// @param cur_node Node to modify. +/// @param cur_token Token to set position from. +#define POS_FROM_TOKEN(cur_node, cur_token) \ + do { \ + (cur_node)->start = cur_token.start; \ + (cur_node)->len = cur_token.len; \ + } while (0) + +/// Allocate new node and set its position from the current token +/// +/// If previous token happened to contain spacing then it will be included. +/// +/// @param cur_node Variable to save allocated node to. +/// @param typ Node type. +#define NEW_NODE_WITH_CUR_POS(cur_node, typ) \ + do { \ + (cur_node) = NEW_NODE(typ); \ + POS_FROM_TOKEN((cur_node), cur_token); \ + if (prev_token.type == kExprLexSpacing) { \ + (cur_node)->start = prev_token.start; \ + (cur_node)->len += prev_token.len; \ + } \ + } while (0) + +/// Check whether it is possible to have next expression after current +/// +/// For :echo: `:echo @a @a` is a valid expression. `:echo (@a @a)` is not. +#define MAY_HAVE_NEXT_EXPR \ + (kv_size(ast_stack) == 1) + +/// Add operator node +/// +/// @param[in] cur_node Node to add. +#define ADD_OP_NODE(cur_node) \ + is_invalid |= !viml_pexpr_handle_bop(pstate, &ast_stack, cur_node, \ + &want_node, &ast.err) + +/// Record missing operator: for things like +/// +/// :echo @a @a +/// +/// (allowed) or +/// +/// :echo (@a @a) +/// +/// (parsed as OpMissing(@a, @a)). +#define OP_MISSING \ + do { \ + if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) { \ + /* Multiple expressions allowed, return without calling */ \ + /* viml_parser_advance(). */ \ + goto viml_pexpr_parse_end; \ + } else { \ + assert(*top_node_p != NULL); \ + ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Missing operator: %.*s")); \ + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOpMissing); \ + cur_node->len = 0; \ + ADD_OP_NODE(cur_node); \ + goto viml_pexpr_parse_process_token; \ + } \ + } while (0) + +/// Record missing value: for things like "* 5" +/// +/// @param[in] msg Error message. +#define ADD_VALUE_IF_MISSING(msg) \ + do { \ + if (want_node == kENodeValue) { \ + ERROR_FROM_TOKEN_AND_MSG(cur_token, (msg)); \ + NEW_NODE_WITH_CUR_POS((*top_node_p), kExprNodeMissing); \ + (*top_node_p)->len = 0; \ + want_node = kENodeOperator; \ + } \ + } while (0) + +/// Set AST error, unless AST already is not correct +/// +/// @param[out] ret_ast AST to set error in. +/// @param[in] pstate Parser state, used to get error message argument. +/// @param[in] msg Error message, assumed to be already translated and +/// containing a single %token "%.*s". +/// @param[in] start Position at which error occurred. +static inline void east_set_error(const ParserState *const pstate, + ExprASTError *const ret_ast_err, + const char *const msg, + const ParserPosition start) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE +{ + if (ret_ast_err->msg != NULL) { + return; + } + const ParserLine pline = pstate->reader.lines.items[start.line]; + ret_ast_err->msg = msg; + ret_ast_err->arg_len = (int)(pline.size - start.col); + ret_ast_err->arg = pline.data + start.col; +} + +/// Set error from the given token and given message +#define ERROR_FROM_TOKEN_AND_MSG(cur_token, msg) \ + do { \ + is_invalid = true; \ + east_set_error(pstate, &ast.err, msg, cur_token.start); \ + } while (0) + +/// Like #ERROR_FROM_TOKEN_AND_MSG, but gets position from a node +#define ERROR_FROM_NODE_AND_MSG(node, msg) \ + do { \ + is_invalid = true; \ + east_set_error(pstate, &ast.err, msg, node->start); \ + } while (0) + +/// Set error from the given kExprLexInvalid token +#define ERROR_FROM_TOKEN(cur_token) \ + ERROR_FROM_TOKEN_AND_MSG(cur_token, cur_token.data.err.msg) + +/// Select figure brace type, altering highlighting as well if needed +/// +/// @param[out] node Node to modify type. +/// @param[in] new_type New type, one of ExprASTNodeType values without +/// kExprNode prefix. +/// @param[in] hl Corresponding highlighting, passed as an argument to #HL. +#define SELECT_FIGURE_BRACE_TYPE(node, new_type, hl) \ + do { \ + ExprASTNode *const node_ = (node); \ + assert(node_->type == kExprNodeUnknownFigure \ + || node_->type == kExprNode##new_type); \ + node_->type = kExprNode##new_type; \ + if (pstate->colors) { \ + kv_A(*pstate->colors, node_->data.fig.opening_hl_idx).group = \ + HL(hl); \ + } \ + } while (0) + +/// Add identifier which should constitute complex identifier node +/// +/// This one is to be called only in case want_node is kENodeOperator. +/// +/// @param new_ident_node_code Code used to create a new identifier node and +/// update want_node and ast_stack, without +/// a trailing semicolon. +/// @param hl Highlighting name to use, passed as an argument to #HL. +#define ADD_IDENT(new_ident_node_code, hl) \ + do { \ + assert(want_node == kENodeOperator); \ + /* Operator: may only be curly braces name, but only under certain */ \ + /* conditions. */ \ +\ + /* First condition is that there is no space before a part of complex */ \ + /* identifier. */ \ + if (prev_token.type == kExprLexSpacing) { \ + OP_MISSING; \ + } \ + switch ((*top_node_p)->type) { \ + /* Second is that previous node is one of the identifiers: */ \ + /* complex, plain, curly braces. */ \ +\ + /* TODO(ZyX-I): Extend syntax to allow ${expr}. This is needed to */ \ + /* handle environment variables like those bash uses for */ \ + /* `export -f`: their names consist not only of alphanumeric */ \ + /* characetrs. */ \ + case kExprNodeComplexIdentifier: \ + case kExprNodePlainIdentifier: \ + case kExprNodeCurlyBracesIdentifier: { \ + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComplexIdentifier); \ + cur_node->len = 0; \ + cur_node->children = *top_node_p; \ + *top_node_p = cur_node; \ + kvi_push(ast_stack, &cur_node->children->next); \ + ExprASTNode **const new_top_node_p = kv_last(ast_stack); \ + assert(*new_top_node_p == NULL); \ + new_ident_node_code; \ + *new_top_node_p = cur_node; \ + HL_CUR_TOKEN(hl); \ + break; \ + } \ + default: { \ + OP_MISSING; \ + break; \ + } \ + } \ + } while (0) + +/// Determine whether given parse type is an assignment +/// +/// @param[in] pt Checked parse type. +/// +/// @return true if parsing an assignment, false otherwise. +static inline bool pt_is_assignment(const ExprASTParseType pt) + FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT +{ + return (pt == kEPTAssignment || pt == kEPTSingleAssignment); +} + +/// Structure used to define “string shifts” necessary to map string +/// highlighting to actual strings. +typedef struct { + size_t start; ///< Where special character starts in original string. + size_t orig_len; ///< Length of orininal string (e.g. 4 for "\x80"). + size_t act_len; ///< Length of resulting character(s) (e.g. 1 for "\x80"). + bool escape_not_known; ///< True if escape sequence in original is not known. +} StringShift; + +/// Parse and highlight single- or double-quoted string +/// +/// Function is supposed to detect and highlight regular expressions (but does +/// not do now). +/// +/// @param[out] pstate Parser state which also contains a place where +/// highlighting is saved. +/// @param[out] node Node where string parsing results are saved. +/// @param[in] token Token to highlight. +/// @param[in] ast_stack Parser AST stack, used to detect whether current +/// string is a regex. +/// @param[in] is_invalid Whether currently processed token is not valid. +static void parse_quoted_string(ParserState *const pstate, + ExprASTNode *const node, + const LexExprToken token, + const ExprASTStack ast_stack, + const bool is_invalid) + FUNC_ATTR_NONNULL_ALL +{ + const ParserLine pline = pstate->reader.lines.items[token.start.line]; + const char *const s = pline.data + token.start.col; + const char *const e = s + token.len - token.data.str.closed; + const char *p = s + 1; + const bool is_double = (token.type == kExprLexDoubleQuotedString); + size_t size = token.len - token.data.str.closed - 1; + kvec_withinit_t(StringShift, 16) shifts; + kvi_init(shifts); + if (!is_double) { + viml_parser_highlight(pstate, token.start, 1, HL(SingleQuote)); + while (p < e) { + const char *const chunk_e = memchr(p, '\'', (size_t)(e - p)); + if (chunk_e == NULL) { + break; + } + size--; + p = chunk_e + 2; + if (pstate->colors) { + kvi_push(shifts, ((StringShift) { + .start = token.start.col + (size_t)(chunk_e - s), + .orig_len = 2, + .act_len = 1, + .escape_not_known = false, + })); + } + } + node->data.str.size = size; + if (size == 0) { + node->data.str.value = NULL; + } else { + char *v_p; + v_p = node->data.str.value = xmallocz(size); + p = s + 1; + while (p < e) { + const char *const chunk_e = memchr(p, '\'', (size_t)(e - p)); + if (chunk_e == NULL) { + memcpy(v_p, p, (size_t)(e - p)); + break; + } + memcpy(v_p, p, (size_t)(chunk_e - p)); + v_p += (size_t)(chunk_e - p) + 1; + v_p[-1] = '\''; + p = chunk_e + 2; + } + } + } else { + viml_parser_highlight(pstate, token.start, 1, HL(DoubleQuote)); + for (p = s + 1; p < e; p++) { + if (*p == '\\' && p + 1 < e) { + p++; + if (p + 1 == e) { + size--; + break; + } + switch (*p) { + // A "\<x>" form occupies at least 4 characters, and produces up to + // 6 characters: reserve space for 2 extra, but do not compute actual + // length just now, it would be costy. + case '<': { + size += 2; + break; + } + // Hexadecimal, always single byte, but at least three bytes each. + case 'x': case 'X': { + size--; + if (ascii_isxdigit(p[1])) { + size--; + if (p + 2 < e && ascii_isxdigit(p[2])) { + size--; + } + } + break; + } + // Unicode + // + // \uF takes 1 byte which is 2 bytes less then escape sequence. + // \uFF: 2 bytes, 2 bytes less. + // \uFFF: 3 bytes, 2 bytes less. + // \uFFFF: 3 bytes, 3 bytes less. + // \UFFFFF: 4 bytes, 3 bytes less. + // \UFFFFFF: 5 bytes, 3 bytes less. + // \UFFFFFFF: 6 bytes, 3 bytes less. + // \U7FFFFFFF: 6 bytes, 4 bytes less. + case 'u': case 'U': { + const char *const esc_start = p; + size_t n = (*p == 'u' ? 4 : 8); + int nr = 0; + p++; + while (p + 1 < e && n-- && ascii_isxdigit(p[1])) { + p++; + nr = (nr << 4) + hex2nr(*p); + } + // Escape length: (esc_start - 1) points to "\\", esc_start to "u" + // or "U", p to the byte after last byte. So escape sequence + // occupies p - (esc_start - 1), but it stands for a utf_char2len + // bytes. + size -= (size_t)((p - (esc_start - 1)) - utf_char2len(nr)); + p--; + break; + } + // Octal, always single byte, but at least two bytes each. + case '0': case '1': case '2': case '3': case '4': case '5': case '6': + case '7': { + size--; + p++; + if (*p >= '0' && *p <= '7') { + size--; + p++; + if (p < e && *p >= '0' && *p <= '7') { + size--; + p++; + } + } + break; + } + default: { + size--; + break; + } + } + } + } + if (size == 0) { + node->data.str.value = NULL; + node->data.str.size = 0; + } else { + char *v_p; + v_p = node->data.str.value = xmalloc(size); + p = s + 1; + while (p < e) { + const char *const chunk_e = memchr(p, '\\', (size_t)(e - p)); + if (chunk_e == NULL) { + memcpy(v_p, p, (size_t)(e - p)); + v_p += e - p; + break; + } + memcpy(v_p, p, (size_t)(chunk_e - p)); + v_p += (size_t)(chunk_e - p); + p = chunk_e + 1; + if (p == e) { + *v_p++ = '\\'; + break; + } + bool is_unknown = false; + const char *const v_p_start = v_p; + switch (*p) { +#define SINGLE_CHAR_ESC(ch, real_ch) \ + case ch: { \ + *v_p++ = real_ch; \ + p++; \ + break; \ + } + SINGLE_CHAR_ESC('b', BS) + SINGLE_CHAR_ESC('e', ESC) + SINGLE_CHAR_ESC('f', FF) + SINGLE_CHAR_ESC('n', NL) + SINGLE_CHAR_ESC('r', CAR) + SINGLE_CHAR_ESC('t', TAB) + SINGLE_CHAR_ESC('"', '"') + SINGLE_CHAR_ESC('\\', '\\') +#undef SINGLE_CHAR_ESC + + // Hexadecimal or unicode. + case 'X': case 'x': case 'u': case 'U': { + if (p + 1 < e && ascii_isxdigit(p[1])) { + size_t n; + int nr; + bool is_hex = (*p == 'x' || *p == 'X'); + + if (is_hex) { + n = 2; + } else if (*p == 'u') { + n = 4; + } else { + n = 8; + } + nr = 0; + while (p + 1 < e && n-- && ascii_isxdigit(p[1])) { + p++; + nr = (nr << 4) + hex2nr(*p); + } + p++; + if (is_hex) { + *v_p++ = (char)nr; + } else { + v_p += utf_char2bytes(nr, (char_u *)v_p); + } + } else { + is_unknown = true; + *v_p++ = *p; + p++; + } + break; + } + // Octal: "\1", "\12", "\123". + case '0': case '1': case '2': case '3': case '4': case '5': case '6': + case '7': { + uint8_t ch = (uint8_t)(*p++ - '0'); + if (p < e && *p >= '0' && *p <= '7') { + ch = (uint8_t)((ch << 3) + *p++ - '0'); + if (p < e && *p >= '0' && *p <= '7') { + ch = (uint8_t)((ch << 3) + *p++ - '0'); + } + } + *v_p++ = (char)ch; + break; + } + // Special key, e.g.: "\<C-W>" + case '<': { + const size_t special_len = ( + trans_special((const char_u **)&p, (size_t)(e - p), + (char_u *)v_p, true, true)); + if (special_len != 0) { + v_p += special_len; + } else { + is_unknown = true; + mb_copy_char((const char_u **)&p, (char_u **)&v_p); + } + break; + } + default: { + is_unknown = true; + mb_copy_char((const char_u **)&p, (char_u **)&v_p); + break; + } + } + if (pstate->colors) { + kvi_push(shifts, ((StringShift) { + .start = token.start.col + (size_t)(chunk_e - s), + .orig_len = (size_t)(p - chunk_e), + .act_len = (size_t)(v_p - (char *)v_p_start), + .escape_not_known = is_unknown, + })); + } + } + node->data.str.size = (size_t)(v_p - node->data.str.value); + } + } + if (pstate->colors) { + // TODO(ZyX-I): use ast_stack to determine and highlight regular expressions + // TODO(ZyX-I): use ast_stack to determine and highlight printf format str + // TODO(ZyX-I): use ast_stack to determine and highlight expression strings + size_t next_col = token.start.col + 1; + const char *const body_str = (is_double + ? HL(DoubleQuotedBody) + : HL(SingleQuotedBody)); + const char *const esc_str = (is_double + ? HL(DoubleQuotedEscape) + : HL(SingleQuotedQuote)); + const char *const ukn_esc_str = (is_double + ? HL(DoubleQuotedUnknownEscape) + : HL(SingleQuotedUnknownEscape)); + for (size_t i = 0; i < kv_size(shifts); i++) { + const StringShift cur_shift = kv_A(shifts, i); + if (cur_shift.start > next_col) { + viml_parser_highlight(pstate, recol_pos(token.start, next_col), + cur_shift.start - next_col, + body_str); + } + viml_parser_highlight(pstate, recol_pos(token.start, cur_shift.start), + cur_shift.orig_len, + (cur_shift.escape_not_known + ? ukn_esc_str + : esc_str)); + next_col = cur_shift.start + cur_shift.orig_len; + } + if (next_col - token.start.col < token.len - token.data.str.closed) { + viml_parser_highlight(pstate, recol_pos(token.start, next_col), + (token.start.col + + token.len + - token.data.str.closed + - next_col), + body_str); + } + } + if (token.data.str.closed) { + if (is_double) { + viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1), + 1, HL(DoubleQuote)); + } else { + viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1), + 1, HL(SingleQuote)); + } + } + kvi_destroy(shifts); +} + +/// Additional flags to pass to lexer depending on want_node +static const int want_node_to_lexer_flags[] = { + [kENodeValue] = kELFlagIsNotCmp, + [kENodeOperator] = kELFlagForbidScope, +}; + +/// Number of characters to highlight as NumberPrefix depending on the base +static const uint8_t base_to_prefix_length[] = { + [2] = 2, + [8] = 1, + [10] = 0, + [16] = 2, +}; + +/// Parse one VimL expression +/// +/// @param pstate Parser state. +/// @param[in] flags Additional flags, see ExprParserFlags +/// +/// @return Parsed AST. +ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) + FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL +{ + ExprAST ast = { + .err = { + .msg = NULL, + .arg_len = 0, + .arg = NULL, + }, + .root = NULL, + }; + // Expression stack contains current branch in AST tree: that is + // - Stack item 0 contains root of the tree, i.e. &ast->root. + // - Stack item i points to the previous stack items’ last child. + // + // When parser expects “value” node that is something like identifier or "[" + // (list start) last stack item contains NULL. Otherwise last stack item is + // supposed to contain last “finished” value: e.g. "1" or "+(1, 1)" (node + // representing "1+1"). + ExprASTStack ast_stack; + kvi_init(ast_stack); + kvi_push(ast_stack, &ast.root); + ExprASTWantedNode want_node = kENodeValue; + ExprASTParseTypeStack pt_stack; + kvi_init(pt_stack); + kvi_push(pt_stack, kEPTExpr); + if (flags & kExprFlagsParseLet) { + kvi_push(pt_stack, kEPTAssignment); + } + LexExprToken prev_token = { .type = kExprLexMissing }; + bool highlighted_prev_spacing = false; + // Lambda node, valid when parsing lambda arguments only. + ExprASTNode *lambda_node = NULL; + size_t asgn_level = 0; + do { + const bool is_concat_or_subscript = ( + want_node == kENodeValue + && kv_size(ast_stack) > 1 + && (*kv_Z(ast_stack, 1))->type == kExprNodeConcatOrSubscript); + const int lexer_additional_flags = ( + kELFlagPeek + | ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0) + | ((want_node == kENodeValue + && (kv_size(ast_stack) == 1 + || ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat + && ((*kv_Z(ast_stack, 1))->type + != kExprNodeConcatOrSubscript)))) + ? kELFlagAllowFloat + : 0)); + LexExprToken cur_token = viml_pexpr_next_token( + pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags); + if (cur_token.type == kExprLexEOC) { + break; + } + LexExprTokenType tok_type = cur_token.type; + const bool token_invalid = (tok_type == kExprLexInvalid); + bool is_invalid = token_invalid; +viml_pexpr_parse_process_token: + // May use different flags this time. + cur_token = viml_pexpr_next_token( + pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags); + if (tok_type == kExprLexSpacing) { + if (is_invalid) { + HL_CUR_TOKEN(Spacing); + } else { + // Do not do anything: let regular spacing be highlighted as normal. + // This also allows later to highlight spacing as invalid. + } + goto viml_pexpr_parse_cycle_end; + } else if (is_invalid && prev_token.type == kExprLexSpacing + && !highlighted_prev_spacing) { + viml_parser_highlight(pstate, prev_token.start, prev_token.len, + HL(Spacing)); + is_invalid = false; + highlighted_prev_spacing = true; + } + const ParserLine pline = pstate->reader.lines.items[cur_token.start.line]; + ExprASTNode **const top_node_p = kv_last(ast_stack); + assert(kv_size(ast_stack) >= 1); + ExprASTNode *cur_node = NULL; +#ifndef NDEBUG + const bool want_value = (want_node == kENodeValue); + assert(want_value == (*top_node_p == NULL)); + assert(kv_A(ast_stack, 0) == &ast.root); + // Check that stack item i + 1 points to stack items’ i *last* child. + for (size_t i = 0; i + 1 < kv_size(ast_stack); i++) { + const bool item_null = (want_value && i + 2 == kv_size(ast_stack)); + assert((&(*kv_A(ast_stack, i))->children == kv_A(ast_stack, i + 1) + && (item_null + ? (*kv_A(ast_stack, i))->children == NULL + : (*kv_A(ast_stack, i))->children->next == NULL)) + || ((&(*kv_A(ast_stack, i))->children->next + == kv_A(ast_stack, i + 1)) + && (item_null + ? (*kv_A(ast_stack, i))->children->next == NULL + : (*kv_A(ast_stack, i))->children->next->next == NULL))); + } +#endif + // Note: in Vim whether expression "cond?d.a:2" is valid depends both on + // "cond" and whether "d" is a dictionary: expression is valid if condition + // is true and "d" is a dictionary (with "a" key or it will complain about + // missing one, but this is not relevant); if any of the requirements is + // broken then this thing is parsed as "d . a:2" yielding missing colon + // error. This parser does not allow such ambiguity, especially because it + // simply can’t: whether "d" is a dictionary is not known at the parsing + // time. + // + // Here example will always contain a concat with "a:2" sucking colon, + // making expression invalid both because there is no longer a spare colon + // for ternary and because concatenating dictionary with anything is not + // valid. There are more cases when this will make a difference though. + const bool node_is_key = ( + is_concat_or_subscript + && (cur_token.type == kExprLexPlainIdentifier + ? (!cur_token.data.var.autoload + && cur_token.data.var.scope == kExprVarScopeMissing) + : (cur_token.type == kExprLexNumber)) + && prev_token.type != kExprLexSpacing); + if (is_concat_or_subscript && !node_is_key) { + // Note: in Vim "d. a" (this is the reason behind `prev_token.type != + // kExprLexSpacing` part of the condition) as well as any other "d.{expr}" + // where "{expr}" does not look like a key is invalid whenever "d" happens + // to be a dictionary. Since parser has no idea whether preceding + // expression is actually a dictionary it can’t outright reject anything, + // so it turns kExprNodeConcatOrSubscript into kExprNodeConcat instead, + // which will yield different errors then Vim does in a number of + // circumstances, and in any case runtime and not parse time errors. + (*kv_Z(ast_stack, 1))->type = kExprNodeConcat; + } + // Pop some stack pt_stack items in case of misplaced nodes. + const bool is_single_assignment = kv_last(pt_stack) == kEPTSingleAssignment; + switch (kv_last(pt_stack)) { + case kEPTExpr: { + break; + } + case kEPTLambdaArguments: { + if ((want_node == kENodeOperator + && tok_type != kExprLexComma + && tok_type != kExprLexArrow) + || (want_node == kENodeValue + && !(cur_token.type == kExprLexPlainIdentifier + && cur_token.data.var.scope == kExprVarScopeMissing + && !cur_token.data.var.autoload) + && tok_type != kExprLexArrow)) { + lambda_node->data.fig.type_guesses.allow_lambda = false; + if (lambda_node->children != NULL + && lambda_node->children->type == kExprNodeComma) { + // If lambda has comma child this means that parser has already seen + // at least "{arg1,", so node cannot possibly be anything, but + // lambda. + + // Vim may give E121 or E720 in this case, but it does not look + // right to have either because both are results of reevaluation + // possibly-lambda node as a dictionary and here this is not going + // to happen. + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Expected lambda arguments list or arrow: %.*s")); + } else { + // Else it may appear that possibly-lambda node is actually + // a dictionary or curly-braces-name identifier. + lambda_node = NULL; + kv_drop(pt_stack, 1); + } + } + break; + } + case kEPTSingleAssignment: + case kEPTAssignment: { + if (want_node == kENodeValue + && tok_type != kExprLexBracket + && tok_type != kExprLexPlainIdentifier + && (tok_type != kExprLexFigureBrace || cur_token.data.brc.closing) + && !(node_is_key && tok_type == kExprLexNumber) + && tok_type != kExprLexEnv + && tok_type != kExprLexOption + && tok_type != kExprLexRegister) { + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Expected value part of assignment lvalue: %.*s")); + kv_drop(pt_stack, 1); + } else if (want_node == kENodeOperator + && tok_type != kExprLexBracket + && (tok_type != kExprLexFigureBrace + || cur_token.data.brc.closing) + && tok_type != kExprLexDot + && (tok_type != kExprLexComma || !is_single_assignment) + && tok_type != kExprLexAssignment + // Curly brace identifiers: will contain plain identifier or + // another curly brace in position where operator is wanted. + && !((tok_type == kExprLexPlainIdentifier + || (tok_type == kExprLexFigureBrace + && !cur_token.data.brc.closing)) + && prev_token.type != kExprLexSpacing)) { + if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) { + goto viml_pexpr_parse_end; + } + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Expected assignment operator or subscript: %.*s")); + kv_drop(pt_stack, 1); + } + assert(kv_size(pt_stack)); + break; + } + } + assert(kv_size(pt_stack)); + const ExprASTParseType cur_pt = kv_last(pt_stack); + assert(lambda_node == NULL || cur_pt == kEPTLambdaArguments); + switch (tok_type) { + case kExprLexMissing: + case kExprLexSpacing: + case kExprLexEOC: { + assert(false); + } + case kExprLexInvalid: { + ERROR_FROM_TOKEN(cur_token); + tok_type = cur_token.data.err.type; + goto viml_pexpr_parse_process_token; + } + case kExprLexRegister: { + if (want_node == kENodeOperator) { + // Register in operator position: e.g. @a @a + OP_MISSING; + } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeRegister); + cur_node->data.reg.name = cur_token.data.reg.name; + *top_node_p = cur_node; + want_node = kENodeOperator; + HL_CUR_TOKEN(Register); + break; + } +#define SIMPLE_UB_OP(op) \ + case kExprLex##op: { \ + if (want_node == kENodeValue) { \ + /* Value level: assume unary operator. */ \ + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnary##op); \ + *top_node_p = cur_node; \ + kvi_push(ast_stack, &cur_node->children); \ + HL_CUR_TOKEN(Unary##op); \ + } else { \ + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeBinary##op); \ + ADD_OP_NODE(cur_node); \ + HL_CUR_TOKEN(Binary##op); \ + } \ + want_node = kENodeValue; \ + break; \ + } + SIMPLE_UB_OP(Plus) + SIMPLE_UB_OP(Minus) +#undef SIMPLE_UB_OP +#define SIMPLE_B_OP(op, msg) \ + case kExprLex##op: { \ + ADD_VALUE_IF_MISSING(_("E15: Unexpected " msg ": %.*s")); \ + NEW_NODE_WITH_CUR_POS(cur_node, kExprNode##op); \ + HL_CUR_TOKEN(op); \ + ADD_OP_NODE(cur_node); \ + break; \ + } + SIMPLE_B_OP(Or, "or operator") + SIMPLE_B_OP(And, "and operator") +#undef SIMPLE_B_OP + case kExprLexMultiplication: { + ADD_VALUE_IF_MISSING( + _("E15: Unexpected multiplication-like operator: %.*s")); + switch (cur_token.data.mul.type) { +#define MUL_OP(lex_op_tail, node_op_tail) \ + case kExprLexMul##lex_op_tail: { \ + NEW_NODE_WITH_CUR_POS(cur_node, kExprNode##node_op_tail); \ + HL_CUR_TOKEN(node_op_tail); \ + break; \ + } + MUL_OP(Mul, Multiplication) + MUL_OP(Div, Division) + MUL_OP(Mod, Mod) +#undef MUL_OP + } + ADD_OP_NODE(cur_node); + break; + } + case kExprLexOption: { + if (want_node == kENodeOperator) { + OP_MISSING; + } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOption); + if (cur_token.type == kExprLexInvalid) { + assert(cur_token.len == 1 + || (cur_token.len == 3 + && pline.data[cur_token.start.col + 2] == ':')); + cur_node->data.opt.ident = ( + pline.data + cur_token.start.col + cur_token.len); + cur_node->data.opt.ident_len = 0; + cur_node->data.opt.scope = ( + cur_token.len == 3 + ? (ExprOptScope)pline.data[cur_token.start.col + 1] + : kExprOptScopeUnspecified); + } else { + cur_node->data.opt.ident = cur_token.data.opt.name; + cur_node->data.opt.ident_len = cur_token.data.opt.len; + cur_node->data.opt.scope = cur_token.data.opt.scope; + } + *top_node_p = cur_node; + want_node = kENodeOperator; + viml_parser_highlight(pstate, cur_token.start, 1, HL(OptionSigil)); + const size_t scope_shift = ( + cur_token.data.opt.scope == kExprOptScopeUnspecified ? 0 : 2); + if (scope_shift) { + viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1, + HL(OptionScope)); + viml_parser_highlight(pstate, shifted_pos(cur_token.start, 2), 1, + HL(OptionScopeDelimiter)); + } + viml_parser_highlight( + pstate, shifted_pos(cur_token.start, scope_shift + 1), + cur_token.len - (scope_shift + 1), HL(OptionName)); + break; + } + case kExprLexEnv: { + if (want_node == kENodeOperator) { + OP_MISSING; + } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeEnvironment); + cur_node->data.env.ident = pline.data + cur_token.start.col + 1; + cur_node->data.env.ident_len = cur_token.len - 1; + if (cur_node->data.env.ident_len == 0) { + ERROR_FROM_TOKEN_AND_MSG(cur_token, + _("E15: Environment variable name missing")); + } + *top_node_p = cur_node; + want_node = kENodeOperator; + viml_parser_highlight(pstate, cur_token.start, 1, HL(EnvironmentSigil)); + viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), + cur_token.len - 1, HL(EnvironmentName)); + break; + } + case kExprLexNot: { + if (want_node == kENodeOperator) { + OP_MISSING; + } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNot); + *top_node_p = cur_node; + kvi_push(ast_stack, &cur_node->children); + HL_CUR_TOKEN(Not); + break; + } + case kExprLexComparison: { + ADD_VALUE_IF_MISSING( + _("E15: Expected value, got comparison operator: %.*s")); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComparison); + if (cur_token.type == kExprLexInvalid) { + cur_node->data.cmp.ccs = kCCStrategyUseOption; + cur_node->data.cmp.type = kExprCmpEqual; + cur_node->data.cmp.inv = false; + } else { + cur_node->data.cmp.ccs = cur_token.data.cmp.ccs; + cur_node->data.cmp.type = cur_token.data.cmp.type; + cur_node->data.cmp.inv = cur_token.data.cmp.inv; + } + ADD_OP_NODE(cur_node); + if (cur_token.data.cmp.ccs != kCCStrategyUseOption) { + viml_parser_highlight(pstate, cur_token.start, cur_token.len - 1, + HL(Comparison)); + viml_parser_highlight( + pstate, shifted_pos(cur_token.start, cur_token.len - 1), 1, + HL(ComparisonModifier)); + } else { + HL_CUR_TOKEN(Comparison); + } + want_node = kENodeValue; + break; + } + case kExprLexComma: { + assert(!(want_node == kENodeValue && cur_pt == kEPTLambdaArguments)); + if (want_node == kENodeValue) { + // Value level: comma appearing here is not valid. + // Note: in Vim string(,x) will give E116, this is not the case here. + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Expected value, got comma: %.*s")); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing); + cur_node->len = 0; + *top_node_p = cur_node; + want_node = kENodeOperator; + } + if (cur_pt == kEPTLambdaArguments) { + assert(lambda_node != NULL); + assert(lambda_node->data.fig.type_guesses.allow_lambda); + SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda); + } + if (kv_size(ast_stack) < 2) { + goto viml_pexpr_parse_invalid_comma; + } + for (size_t i = 1; i < kv_size(ast_stack); i++) { + ExprASTNode *const *const eastnode_p = + (ExprASTNode *const *)kv_Z(ast_stack, i); + const ExprASTNodeType eastnode_type = (*eastnode_p)->type; + const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p); + if (eastnode_type == kExprNodeLambda) { + assert(cur_pt == kEPTLambdaArguments + && want_node == kENodeOperator); + break; + } else if (eastnode_type == kExprNodeDictLiteral + || eastnode_type == kExprNodeListLiteral + || eastnode_type == kExprNodeCall) { + break; + } else if (eastnode_type == kExprNodeComma + || eastnode_type == kExprNodeColon + || eastnode_lvl > kEOpLvlComma) { + // Do nothing + } else { +viml_pexpr_parse_invalid_comma: + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Comma outside of call, lambda or literal: %.*s")); + break; + } + if (i == kv_size(ast_stack) - 1) { + goto viml_pexpr_parse_invalid_comma; + } + } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComma); + ADD_OP_NODE(cur_node); + HL_CUR_TOKEN(Comma); + break; + } +#define EXP_VAL_COLON "E15: Expected value, got colon: %.*s" + case kExprLexColon: { + bool is_ternary = false; + if (kv_size(ast_stack) < 2) { + goto viml_pexpr_parse_invalid_colon; + } + bool can_be_ternary = true; + bool is_subscript = false; + for (size_t i = 1; i < kv_size(ast_stack); i++) { + ExprASTNode *const *const eastnode_p = + (ExprASTNode *const *)kv_Z(ast_stack, i); + const ExprASTNodeType eastnode_type = (*eastnode_p)->type; + const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p); + STATIC_ASSERT(kEOpLvlTernary > kEOpLvlComma, + "Unexpected operator priorities"); + if (can_be_ternary && eastnode_type == kExprNodeTernaryValue + && !(*eastnode_p)->data.ter.got_colon) { + kv_drop(ast_stack, i); + (*eastnode_p)->start = cur_token.start; + (*eastnode_p)->len = cur_token.len; + if (prev_token.type == kExprLexSpacing) { + (*eastnode_p)->start = prev_token.start; + (*eastnode_p)->len += prev_token.len; + } + is_ternary = true; + (*eastnode_p)->data.ter.got_colon = true; + ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); + assert((*eastnode_p)->children != NULL); + assert((*eastnode_p)->children->next == NULL); + kvi_push(ast_stack, &(*eastnode_p)->children->next); + break; + } else if (eastnode_type == kExprNodeUnknownFigure) { + SELECT_FIGURE_BRACE_TYPE(*eastnode_p, DictLiteral, Dict); + break; + } else if (eastnode_type == kExprNodeDictLiteral) { + break; + } else if (eastnode_type == kExprNodeSubscript) { + is_subscript = true; + can_be_ternary = false; + assert(!is_ternary); + break; + } else if (eastnode_type == kExprNodeColon) { + goto viml_pexpr_parse_invalid_colon; + } else if (eastnode_lvl >= kEOpLvlTernaryValue) { + // Do nothing + } else if (eastnode_lvl >= kEOpLvlComma) { + can_be_ternary = false; + } else { + goto viml_pexpr_parse_invalid_colon; + } + if (i == kv_size(ast_stack) - 1) { + goto viml_pexpr_parse_invalid_colon; + } + } + if (is_subscript) { + assert(kv_size(ast_stack) > 1); + // Colon immediately following subscript start: it is empty subscript + // part like a[:2]. + if (want_node == kENodeValue + && (*kv_Z(ast_stack, 1))->type == kExprNodeSubscript) { + NEW_NODE_WITH_CUR_POS(*top_node_p, kExprNodeMissing); + (*top_node_p)->len = 0; + want_node = kENodeOperator; + } else { + ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); + } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); + ADD_OP_NODE(cur_node); + HL_CUR_TOKEN(SubscriptColon); + } else { + goto viml_pexpr_parse_valid_colon; +viml_pexpr_parse_invalid_colon: + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Colon outside of dictionary or ternary operator: %.*s")); +viml_pexpr_parse_valid_colon: + ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); + if (is_ternary) { + HL_CUR_TOKEN(TernaryColon); + } else { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); + ADD_OP_NODE(cur_node); + HL_CUR_TOKEN(Colon); + } + } + want_node = kENodeValue; + break; + } +#undef EXP_VAL_COLON + case kExprLexBracket: { + if (cur_token.data.brc.closing) { + ExprASTNode **new_top_node_p = NULL; + // Always drop the topmost value: + // + // 1. When want_node != kENodeValue topmost item on stack is + // a *finished* left operand, which may as well be "{@a}" which + // needs not be finished again. + // 2. Otherwise it is pointing to NULL what nobody wants. + kv_drop(ast_stack, 1); + if (!kv_size(ast_stack)) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral); + cur_node->len = 0; + if (want_node != kENodeValue) { + cur_node->children = *top_node_p; + } + *top_node_p = cur_node; + goto viml_pexpr_parse_bracket_closing_error; + } + if (want_node == kENodeValue) { + // It is OK to want value if + // + // 1. It is empty list literal, in which case top node will be + // ListLiteral. + // 2. It is list literal with trailing comma, in which case top node + // will be that comma. + // 3. It is subscript with colon, but without one of the values: + // e.g. "a[:]", "a[1:]", top node will be colon in this case. + if ((*kv_last(ast_stack))->type != kExprNodeListLiteral + && (*kv_last(ast_stack))->type != kExprNodeComma + && (*kv_last(ast_stack))->type != kExprNodeColon) { + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Expected value, got closing bracket: %.*s")); + } + } else { + if (!kv_size(ast_stack)) { + new_top_node_p = top_node_p; + goto viml_pexpr_parse_bracket_closing_error; + } + } + do { + new_top_node_p = kv_pop(ast_stack); + } while (kv_size(ast_stack) + && (new_top_node_p == NULL + || ((*new_top_node_p)->type != kExprNodeListLiteral + && (*new_top_node_p)->type != kExprNodeSubscript))); + ExprASTNode *new_top_node = *new_top_node_p; + switch (new_top_node->type) { + case kExprNodeListLiteral: { + if (pt_is_assignment(cur_pt) && new_top_node->children == NULL) { + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E475: Unable to assign to empty list: %.*s")); + } + HL_CUR_TOKEN(List); + break; + } + case kExprNodeSubscript: { + HL_CUR_TOKEN(SubscriptBracket); + break; + } + default: { +viml_pexpr_parse_bracket_closing_error: + assert(!kv_size(ast_stack)); + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Unexpected closing figure brace: %.*s")); + HL_CUR_TOKEN(List); + break; + } + } + kvi_push(ast_stack, new_top_node_p); + want_node = kENodeOperator; + if (kv_size(ast_stack) <= asgn_level) { + assert(kv_size(ast_stack) == asgn_level); + asgn_level = 0; + if (cur_pt == kEPTAssignment) { + assert(ast.err.msg); + } else if (cur_pt == kEPTExpr + && kv_size(pt_stack) > 1 + && pt_is_assignment(kv_Z(pt_stack, 1))) { + kv_drop(pt_stack, 1); + } + } + if (cur_pt == kEPTSingleAssignment && kv_size(ast_stack) == 1) { + kv_drop(pt_stack, 1); + } + } else { + if (want_node == kENodeValue) { + // Value means list literal or list assignment. + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral); + *top_node_p = cur_node; + kvi_push(ast_stack, &cur_node->children); + want_node = kENodeValue; + if (cur_pt == kEPTAssignment) { + // Additional assignment parse type allows to easily forbid nested + // lists. + kvi_push(pt_stack, kEPTSingleAssignment); + } else if (cur_pt == kEPTSingleAssignment) { + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E475: Nested lists not allowed when assigning: %.*s")); + } + HL_CUR_TOKEN(List); + } else { + // Operator means subscript, also in assignment. But in assignment + // subscript may be pretty much any expression, so need to push + // kEPTExpr. + if (prev_token.type == kExprLexSpacing) { + OP_MISSING; + } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeSubscript); + ADD_OP_NODE(cur_node); + HL_CUR_TOKEN(SubscriptBracket); + if (pt_is_assignment(cur_pt)) { + assert(want_node == kENodeValue); // Subtract 1 for NULL at top. + asgn_level = kv_size(ast_stack) - 1; + kvi_push(pt_stack, kEPTExpr); + } + } + } + break; + } + case kExprLexFigureBrace: { + if (cur_token.data.brc.closing) { + ExprASTNode **new_top_node_p = NULL; + // Always drop the topmost value: + // + // 1. When want_node != kENodeValue topmost item on stack is + // a *finished* left operand, which may as well be "{@a}" which + // needs not be finished again. + // 2. Otherwise it is pointing to NULL what nobody wants. + kv_drop(ast_stack, 1); + if (!kv_size(ast_stack)) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure); + cur_node->data.fig.type_guesses.allow_lambda = false; + cur_node->data.fig.type_guesses.allow_dict = false; + cur_node->data.fig.type_guesses.allow_ident = false; + cur_node->len = 0; + if (want_node != kENodeValue) { + cur_node->children = *top_node_p; + } + *top_node_p = cur_node; + new_top_node_p = top_node_p; + goto viml_pexpr_parse_figure_brace_closing_error; + } + if (want_node == kENodeValue) { + if ((*kv_last(ast_stack))->type != kExprNodeUnknownFigure + && (*kv_last(ast_stack))->type != kExprNodeComma) { + // kv_last being UnknownFigure may occur for empty dictionary + // literal, while Comma is expected in case of non-empty one. + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Expected value, got closing figure brace: %.*s")); + } + } else { + if (!kv_size(ast_stack)) { + new_top_node_p = top_node_p; + goto viml_pexpr_parse_figure_brace_closing_error; + } + } + do { + new_top_node_p = kv_pop(ast_stack); + } while (kv_size(ast_stack) + && (new_top_node_p == NULL + || ((*new_top_node_p)->type != kExprNodeUnknownFigure + && (*new_top_node_p)->type != kExprNodeDictLiteral + && ((*new_top_node_p)->type + != kExprNodeCurlyBracesIdentifier) + && (*new_top_node_p)->type != kExprNodeLambda))); + ExprASTNode *new_top_node = *new_top_node_p; + switch (new_top_node->type) { + case kExprNodeUnknownFigure: { + if (new_top_node->children == NULL) { + // No children of curly braces node indicates empty dictionary. + assert(want_node == kENodeValue); + assert(new_top_node->data.fig.type_guesses.allow_dict); + SELECT_FIGURE_BRACE_TYPE(new_top_node, DictLiteral, Dict); + HL_CUR_TOKEN(Dict); + } else if (new_top_node->data.fig.type_guesses.allow_ident) { + SELECT_FIGURE_BRACE_TYPE(new_top_node, CurlyBracesIdentifier, + Curly); + HL_CUR_TOKEN(Curly); + } else { + // If by this time type of the node has not already been + // guessed, but it definitely is not a curly braces name then + // it is invalid for sure. + ERROR_FROM_NODE_AND_MSG( + new_top_node, + _("E15: Don't know what figure brace means: %.*s")); + if (pstate->colors) { + // Will reset to NvimInvalidFigureBrace. + kv_A(*pstate->colors, + new_top_node->data.fig.opening_hl_idx).group = ( + HL(FigureBrace)); + } + HL_CUR_TOKEN(FigureBrace); + } + break; + } + case kExprNodeDictLiteral: { + HL_CUR_TOKEN(Dict); + break; + } + case kExprNodeCurlyBracesIdentifier: { + HL_CUR_TOKEN(Curly); + break; + } + case kExprNodeLambda: { + HL_CUR_TOKEN(Lambda); + break; + } + default: { +viml_pexpr_parse_figure_brace_closing_error: + assert(!kv_size(ast_stack)); + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Unexpected closing figure brace: %.*s")); + HL_CUR_TOKEN(FigureBrace); + break; + } + } + kvi_push(ast_stack, new_top_node_p); + want_node = kENodeOperator; + if (kv_size(ast_stack) <= asgn_level) { + assert(kv_size(ast_stack) == asgn_level); + if (cur_pt == kEPTExpr + && kv_size(pt_stack) > 1 + && pt_is_assignment(kv_Z(pt_stack, 1))) { + kv_drop(pt_stack, 1); + asgn_level = 0; + } + } + } else { + if (want_node == kENodeValue) { + HL_CUR_TOKEN(FigureBrace); + // Value: may be any of lambda, dictionary literal and curly braces + // name. + + // Though if we are in an assignment this may only be a curly braces + // name. + if (pt_is_assignment(cur_pt)) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCurlyBracesIdentifier); + cur_node->data.fig.type_guesses.allow_lambda = false; + cur_node->data.fig.type_guesses.allow_dict = false; + cur_node->data.fig.type_guesses.allow_ident = true; + kvi_push(pt_stack, kEPTExpr); + } else { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure); + cur_node->data.fig.type_guesses.allow_lambda = true; + cur_node->data.fig.type_guesses.allow_dict = true; + cur_node->data.fig.type_guesses.allow_ident = true; + } + if (pstate->colors) { + cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors) - 1; + } + *top_node_p = cur_node; + kvi_push(ast_stack, &cur_node->children); + kvi_push(pt_stack, kEPTLambdaArguments); + lambda_node = cur_node; + } else { + ADD_IDENT( + do { + NEW_NODE_WITH_CUR_POS(cur_node, + kExprNodeCurlyBracesIdentifier); + cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors); + cur_node->data.fig.type_guesses.allow_lambda = false; + cur_node->data.fig.type_guesses.allow_dict = false; + cur_node->data.fig.type_guesses.allow_ident = true; + kvi_push(ast_stack, &cur_node->children); + if (pt_is_assignment(cur_pt)) { + kvi_push(pt_stack, kEPTExpr); + } + want_node = kENodeValue; + } while (0), + Curly); + } + if (pt_is_assignment(cur_pt) + && !pt_is_assignment(kv_last(pt_stack))) { + assert(want_node == kENodeValue); // Subtract 1 for NULL at top. + asgn_level = kv_size(ast_stack) - 1; + } + } + break; + } + case kExprLexArrow: { + if (cur_pt == kEPTLambdaArguments) { + kv_drop(pt_stack, 1); + assert(kv_size(pt_stack)); + if (want_node == kENodeValue) { + // Wanting value means trailing comma and NULL at the top of the + // stack. + kv_drop(ast_stack, 1); + } + assert(kv_size(ast_stack) >= 1); + while ((*kv_last(ast_stack))->type != kExprNodeLambda + && (*kv_last(ast_stack))->type != kExprNodeUnknownFigure) { + kv_drop(ast_stack, 1); + } + assert((*kv_last(ast_stack)) == lambda_node); + SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow); + if (lambda_node->children == NULL) { + assert(want_node == kENodeValue); + lambda_node->children = cur_node; + kvi_push(ast_stack, &lambda_node->children); + } else { + assert(lambda_node->children->next == NULL); + lambda_node->children->next = cur_node; + kvi_push(ast_stack, &lambda_node->children->next); + } + kvi_push(ast_stack, &cur_node->children); + lambda_node = NULL; + } else { + // Only first branch is valid. + ADD_VALUE_IF_MISSING(_("E15: Unexpected arrow: %.*s")); + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Arrow outside of lambda: %.*s")); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow); + ADD_OP_NODE(cur_node); + } + want_node = kENodeValue; + HL_CUR_TOKEN(Arrow); + break; + } + case kExprLexPlainIdentifier: { + const ExprVarScope scope = (cur_token.type == kExprLexInvalid + ? kExprVarScopeMissing + : cur_token.data.var.scope); + if (want_node == kENodeValue) { + want_node = kENodeOperator; + NEW_NODE_WITH_CUR_POS(cur_node, + (node_is_key + ? kExprNodePlainKey + : kExprNodePlainIdentifier)); + cur_node->data.var.scope = scope; + const size_t scope_shift = (scope == kExprVarScopeMissing ? 0 : 2); + cur_node->data.var.ident = (pline.data + cur_token.start.col + + scope_shift); + cur_node->data.var.ident_len = cur_token.len - scope_shift; + *top_node_p = cur_node; + if (scope_shift) { + assert(!node_is_key); + viml_parser_highlight(pstate, cur_token.start, 1, + HL(IdentifierScope)); + viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1, + HL(IdentifierScopeDelimiter)); + } + viml_parser_highlight(pstate, shifted_pos(cur_token.start, + scope_shift), + cur_token.len - scope_shift, + (node_is_key + ? HL(IdentifierKey) + : HL(IdentifierName))); + } else { + if (scope == kExprVarScopeMissing) { + ADD_IDENT( + do { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier); + cur_node->data.var.scope = scope; + cur_node->data.var.ident = pline.data + cur_token.start.col; + cur_node->data.var.ident_len = cur_token.len; + want_node = kENodeOperator; + } while (0), + IdentifierName); + } else { + OP_MISSING; + } + } + break; + } + case kExprLexNumber: { + if (want_node != kENodeValue) { + OP_MISSING; + } + if (node_is_key) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainKey); + cur_node->data.var.ident = pline.data + cur_token.start.col; + cur_node->data.var.ident_len = cur_token.len; + HL_CUR_TOKEN(IdentifierKey); + } else if (cur_token.data.num.is_float) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeFloat); + cur_node->data.flt.value = cur_token.data.num.val.floating; + HL_CUR_TOKEN(Float); + } else { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeInteger); + cur_node->data.num.value = cur_token.data.num.val.integer; + const uint8_t prefix_length = base_to_prefix_length[ + cur_token.data.num.base]; + viml_parser_highlight(pstate, cur_token.start, prefix_length, + HL(NumberPrefix)); + viml_parser_highlight( + pstate, shifted_pos(cur_token.start, prefix_length), + cur_token.len - prefix_length, HL(Number)); + } + want_node = kENodeOperator; + *top_node_p = cur_node; + break; + } + case kExprLexDot: { + ADD_VALUE_IF_MISSING(_("E15: Unexpected dot: %.*s")); + if (prev_token.type == kExprLexSpacing) { + if (cur_pt == kEPTAssignment) { + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Cannot concatenate in assignments: %.*s")); + } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcat); + HL_CUR_TOKEN(Concat); + } else { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcatOrSubscript); + HL_CUR_TOKEN(ConcatOrSubscript); + } + ADD_OP_NODE(cur_node); + break; + } + case kExprLexParenthesis: { + if (cur_token.data.brc.closing) { + if (want_node == kENodeValue) { + if (kv_size(ast_stack) > 1) { + const ExprASTNode *const prev_top_node = *kv_Z(ast_stack, 1); + if (prev_top_node->type == kExprNodeCall) { + // Function call without arguments, this is not an error. + // But further code does not expect NULL nodes. + kv_drop(ast_stack, 1); + goto viml_pexpr_parse_no_paren_closing_error; + } + } + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Expected value, got parenthesis: %.*s")); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing); + cur_node->len = 0; + *top_node_p = cur_node; + } else { + // Always drop the topmost value: when want_node != kENodeValue + // topmost item on stack is a *finished* left operand, which may as + // well be "(@a)" which needs not be finished again. + kv_drop(ast_stack, 1); + } +viml_pexpr_parse_no_paren_closing_error: {} + ExprASTNode **new_top_node_p = NULL; + while (kv_size(ast_stack) + && (new_top_node_p == NULL + || ((*new_top_node_p)->type != kExprNodeNested + && (*new_top_node_p)->type != kExprNodeCall))) { + new_top_node_p = kv_pop(ast_stack); + } + if (new_top_node_p != NULL + && ((*new_top_node_p)->type == kExprNodeNested + || (*new_top_node_p)->type == kExprNodeCall)) { + if ((*new_top_node_p)->type == kExprNodeNested) { + HL_CUR_TOKEN(NestingParenthesis); + } else { + HL_CUR_TOKEN(CallingParenthesis); + } + } else { + // “Always drop the topmost value” branch has got rid of the single + // value stack had, so there is nothing known to enclose. Correct + // this. + if (new_top_node_p == NULL) { + new_top_node_p = top_node_p; + } + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Unexpected closing parenthesis: %.*s")); + HL_CUR_TOKEN(NestingParenthesis); + cur_node = NEW_NODE(kExprNodeNested); + cur_node->start = cur_token.start; + cur_node->len = 0; + // Unexpected closing parenthesis, assume that it was wanted to + // enclose everything in (). + cur_node->children = *new_top_node_p; + *new_top_node_p = cur_node; + assert(cur_node->next == NULL); + } + kvi_push(ast_stack, new_top_node_p); + want_node = kENodeOperator; + } else { + if (want_node == kENodeValue) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNested); + *top_node_p = cur_node; + kvi_push(ast_stack, &cur_node->children); + HL_CUR_TOKEN(NestingParenthesis); + } else if (want_node == kENodeOperator) { + if (prev_token.type == kExprLexSpacing) { + // For some reason "function (args)" is a function call, but + // "(funcref) (args)" is not. AFAIR this somehow involves + // compatibility and Bram was commenting that this is + // intentionally inconsistent and he is not very happy with the + // situation himself. + if ((*top_node_p)->type != kExprNodePlainIdentifier + && (*top_node_p)->type != kExprNodeComplexIdentifier + && (*top_node_p)->type != kExprNodeCurlyBracesIdentifier) { + OP_MISSING; + } + } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCall); + ADD_OP_NODE(cur_node); + HL_CUR_TOKEN(CallingParenthesis); + } else { + // Currently it is impossible to reach this. + assert(false); + } + want_node = kENodeValue; + } + break; + } + case kExprLexQuestion: { + ADD_VALUE_IF_MISSING(_("E15: Expected value, got question mark: %.*s")); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeTernary); + ADD_OP_NODE(cur_node); + HL_CUR_TOKEN(Ternary); + ExprASTNode *ter_val_node; + NEW_NODE_WITH_CUR_POS(ter_val_node, kExprNodeTernaryValue); + ter_val_node->data.ter.got_colon = false; + assert(cur_node->children != NULL); + assert(cur_node->children->next == NULL); + assert(kv_last(ast_stack) == &cur_node->children->next); + *kv_last(ast_stack) = ter_val_node; + kvi_push(ast_stack, &ter_val_node->children); + break; + } + case kExprLexDoubleQuotedString: + case kExprLexSingleQuotedString: { + const bool is_double = (tok_type == kExprLexDoubleQuotedString); + if (!cur_token.data.str.closed) { + // It is weird, but Vim has two identical errors messages with + // different error numbers: "E114: Missing quote" and + // "E115: Missing quote". + ERROR_FROM_TOKEN_AND_MSG( + cur_token, (is_double + ? _("E114: Missing double quote: %.*s") + : _("E115: Missing single quote: %.*s"))); + } + if (want_node == kENodeOperator) { + OP_MISSING; + } + NEW_NODE_WITH_CUR_POS( + cur_node, (is_double + ? kExprNodeDoubleQuotedString + : kExprNodeSingleQuotedString)); + *top_node_p = cur_node; + parse_quoted_string(pstate, cur_node, cur_token, ast_stack, is_invalid); + want_node = kENodeOperator; + break; + } + case kExprLexAssignment: { + if (cur_pt == kEPTAssignment) { + kv_drop(pt_stack, 1); + } else if (cur_pt == kEPTSingleAssignment) { + kv_drop(pt_stack, 2); + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E475: Expected closing bracket to end list assignment " + "lvalue: %.*s")); + } else { + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Misplaced assignment: %.*s")); + } + assert(kv_size(pt_stack)); + assert(kv_last(pt_stack) == kEPTExpr); + ADD_VALUE_IF_MISSING(_("E15: Unexpected assignment: %.*s")); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeAssignment); + cur_node->data.ass.type = cur_token.data.ass.type; + switch (cur_token.data.ass.type) { +#define HL_ASGN(asgn, hl) \ + case kExprAsgn##asgn: { HL_CUR_TOKEN(hl); break; } + HL_ASGN(Plain, PlainAssignment) + HL_ASGN(Add, AssignmentWithAddition) + HL_ASGN(Subtract, AssignmentWithSubtraction) + HL_ASGN(Concat, AssignmentWithConcatenation) +#undef HL_ASGN + } + ADD_OP_NODE(cur_node); + break; + } + } +viml_pexpr_parse_cycle_end: + prev_token = cur_token; + highlighted_prev_spacing = false; + viml_parser_advance(pstate, cur_token.len); + } while (true); +viml_pexpr_parse_end: + assert(kv_size(pt_stack)); + assert(kv_size(ast_stack)); + if (want_node == kENodeValue + // Blacklist some parse type entries as their presence means better error + // message in the other branch. + && kv_last(pt_stack) != kEPTLambdaArguments) { + east_set_error(pstate, &ast.err, _("E15: Expected value, got EOC: %.*s"), + pstate->pos); + } else if (kv_size(ast_stack) != 1) { + // Something may be wrong, check whether it really is. + + // Pointer to ast.root must never be dropped, so “!= 1” is expected to be + // the same as “> 1”. + assert(kv_size(ast_stack)); + // Topmost stack item must be a *finished* value, so it must not be + // analyzed. E.g. it may contain an already finished nested expression. + kv_drop(ast_stack, 1); + while (ast.err.msg == NULL && kv_size(ast_stack)) { + const ExprASTNode *const cur_node = (*kv_pop(ast_stack)); + // This should only happen when want_node == kENodeValue. + assert(cur_node != NULL); + // TODO(ZyX-I): Rehighlight as invalid? + switch (cur_node->type) { + case kExprNodeOpMissing: + case kExprNodeMissing: { + // Error should’ve been already reported. + break; + } + case kExprNodeCall: { + east_set_error( + pstate, &ast.err, + _("E116: Missing closing parenthesis for function call: %.*s"), + cur_node->start); + break; + } + case kExprNodeNested: { + east_set_error( + pstate, &ast.err, + _("E110: Missing closing parenthesis for nested expression" + ": %.*s"), + cur_node->start); + break; + } + case kExprNodeListLiteral: { + // For whatever reason "[1" yields "E696: Missing comma in list" error + // in Vim while "[1," yields E697. + east_set_error( + pstate, &ast.err, + _("E697: Missing end of List ']': %.*s"), + cur_node->start); + break; + } + case kExprNodeDictLiteral: { + // Same problem like with list literal with E722 (missing comma) vs + // E723, but additionally just "{" yields only E15. + east_set_error( + pstate, &ast.err, + _("E723: Missing end of Dictionary '}': %.*s"), + cur_node->start); + break; + } + case kExprNodeUnknownFigure: { + east_set_error( + pstate, &ast.err, + _("E15: Missing closing figure brace: %.*s"), + cur_node->start); + break; + } + case kExprNodeLambda: { + east_set_error( + pstate, &ast.err, + _("E15: Missing closing figure brace for lambda: %.*s"), + cur_node->start); + break; + } + case kExprNodeCurlyBracesIdentifier: { + // Until trailing "}" it is impossible to distinguish curly braces + // identifier and dictionary, so it must not appear in the stack like + // this. + assert(false); + } + case kExprNodeInteger: + case kExprNodeFloat: + case kExprNodeSingleQuotedString: + case kExprNodeDoubleQuotedString: + case kExprNodeOption: + case kExprNodeEnvironment: + case kExprNodeRegister: + case kExprNodePlainIdentifier: + case kExprNodePlainKey: { + // These are plain values and not containers, for them it should only + // be possible to show up in the topmost stack element, but it was + // unconditionally popped at the start. + assert(false); + } + case kExprNodeComma: + case kExprNodeColon: + case kExprNodeArrow: { + // It is actually only valid inside something else, but everything + // where one of the above is valid requires to be closed and thus is + // to be caught later. + break; + } + case kExprNodeSubscript: + case kExprNodeConcatOrSubscript: + case kExprNodeComplexIdentifier: + case kExprNodeAssignment: + case kExprNodeMod: + case kExprNodeDivision: + case kExprNodeMultiplication: + case kExprNodeNot: + case kExprNodeAnd: + case kExprNodeOr: + case kExprNodeConcat: + case kExprNodeComparison: + case kExprNodeUnaryMinus: + case kExprNodeUnaryPlus: + case kExprNodeBinaryMinus: + case kExprNodeTernary: + case kExprNodeBinaryPlus: { + // It is OK to see these in the stack. + break; + } + case kExprNodeTernaryValue: { + if (!cur_node->data.ter.got_colon) { + // Actually Vim throws E109 in more cases. + east_set_error( + pstate, &ast.err, _("E109: Missing ':' after '?': %.*s"), + cur_node->start); + } + break; + } + } + } + } + kvi_destroy(ast_stack); + return ast; +} // NOLINT(readability/fn_size) + +#undef NEW_NODE +#undef HL diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h new file mode 100644 index 0000000000..23e172da75 --- /dev/null +++ b/src/nvim/viml/parser/expressions.h @@ -0,0 +1,389 @@ +#ifndef NVIM_VIML_PARSER_EXPRESSIONS_H +#define NVIM_VIML_PARSER_EXPRESSIONS_H + +#include <stddef.h> +#include <stdint.h> +#include <stdbool.h> + +#include "nvim/types.h" +#include "nvim/viml/parser/parser.h" +#include "nvim/eval/typval.h" + +// Defines whether to ignore case: +// == kCCStrategyUseOption +// ==# kCCStrategyMatchCase +// ==? kCCStrategyIgnoreCase +typedef enum { + kCCStrategyUseOption = 0, // 0 for xcalloc + kCCStrategyMatchCase = '#', + kCCStrategyIgnoreCase = '?', +} ExprCaseCompareStrategy; + +/// Lexer token type +typedef enum { + kExprLexInvalid = 0, ///< Invalid token, indicaten an error. + kExprLexMissing, ///< Missing token, for use in parser. + kExprLexSpacing, ///< Spaces, tabs, newlines, etc. + kExprLexEOC, ///< End of command character: NL, |, just end of stream. + + kExprLexQuestion, ///< Question mark, for use in ternary. + kExprLexColon, ///< Colon, for use in ternary. + kExprLexOr, ///< Logical or operator. + kExprLexAnd, ///< Logical and operator. + kExprLexComparison, ///< One of the comparison operators. + kExprLexPlus, ///< Plus sign. + kExprLexMinus, ///< Minus sign. + kExprLexDot, ///< Dot: either concat or subscript, also part of the float. + kExprLexMultiplication, ///< Multiplication, division or modulo operator. + + kExprLexNot, ///< Not: !. + + kExprLexNumber, ///< Integer number literal, or part of a float. + kExprLexSingleQuotedString, ///< Single quoted string literal. + kExprLexDoubleQuotedString, ///< Double quoted string literal. + kExprLexOption, ///< &optionname option value. + kExprLexRegister, ///< @r register value. + kExprLexEnv, ///< Environment $variable value. + kExprLexPlainIdentifier, ///< Identifier without scope: `abc`, `foo#bar`. + + kExprLexBracket, ///< Bracket, either opening or closing. + kExprLexFigureBrace, ///< Figure brace, either opening or closing. + kExprLexParenthesis, ///< Parenthesis, either opening or closing. + kExprLexComma, ///< Comma. + kExprLexArrow, ///< Arrow, like from lambda expressions. + kExprLexAssignment, ///< Assignment: `=` or `{op}=`. + // XXX When modifying this enum you need to also modify eltkn_type_tab in + // expressions.c and tests and, possibly, viml_pexpr_repr_token. +} LexExprTokenType; + +typedef enum { + kExprCmpEqual, ///< Equality, unequality. + kExprCmpMatches, ///< Matches regex, not matches regex. + kExprCmpGreater, ///< `>` or `<=` + kExprCmpGreaterOrEqual, ///< `>=` or `<`. + kExprCmpIdentical, ///< `is` or `isnot` +} ExprComparisonType; + +/// All possible option scopes +typedef enum { + kExprOptScopeUnspecified = 0, + kExprOptScopeGlobal = 'g', + kExprOptScopeLocal = 'l', +} ExprOptScope; + +/// All possible assignment types: `=` and `{op}=`. +typedef enum { + kExprAsgnPlain = 0, ///< Plain assignment: `=`. + kExprAsgnAdd, ///< Assignment augmented with addition: `+=`. + kExprAsgnSubtract, ///< Assignment augmented with subtraction: `-=`. + kExprAsgnConcat, ///< Assignment augmented with concatenation: `.=`. +} ExprAssignmentType; + +#define EXPR_OPT_SCOPE_LIST \ + ((char[]){ kExprOptScopeGlobal, kExprOptScopeLocal }) + +/// All possible variable scopes +typedef enum { + kExprVarScopeMissing = 0, + kExprVarScopeScript = 's', + kExprVarScopeGlobal = 'g', + kExprVarScopeVim = 'v', + kExprVarScopeBuffer = 'b', + kExprVarScopeWindow = 'w', + kExprVarScopeTabpage = 't', + kExprVarScopeLocal = 'l', + kExprVarScopeArguments = 'a', +} ExprVarScope; + +#define EXPR_VAR_SCOPE_LIST \ + ((char[]) { \ + kExprVarScopeScript, kExprVarScopeGlobal, kExprVarScopeVim, \ + kExprVarScopeBuffer, kExprVarScopeWindow, kExprVarScopeTabpage, \ + kExprVarScopeLocal, kExprVarScopeBuffer, kExprVarScopeArguments, \ + }) + +/// Lexer token +typedef struct { + ParserPosition start; + size_t len; + LexExprTokenType type; + union { + struct { + ExprComparisonType type; ///< Comparison type. + ExprCaseCompareStrategy ccs; ///< Case comparison strategy. + bool inv; ///< True if comparison is to be inverted. + } cmp; ///< For kExprLexComparison. + + struct { + enum { + kExprLexMulMul, ///< Real multiplication. + kExprLexMulDiv, ///< Division. + kExprLexMulMod, ///< Modulo. + } type; ///< Multiplication type. + } mul; ///< For kExprLexMultiplication. + + struct { + bool closing; ///< True if bracket/etc is a closing one. + } brc; ///< For brackets/braces/parenthesis. + + struct { + int name; ///< Register name, may be -1 if name not present. + } reg; ///< For kExprLexRegister. + + struct { + bool closed; ///< True if quote was closed. + } str; ///< For kExprLexSingleQuotedString and kExprLexDoubleQuotedString. + + struct { + const char *name; ///< Option name start. + size_t len; ///< Option name length. + ExprOptScope scope; ///< Option scope: &l:, &g: or not specified. + } opt; ///< Option properties. + + struct { + ExprVarScope scope; ///< Scope character or 0 if not present. + bool autoload; ///< Has autoload characters. + } var; ///< For kExprLexPlainIdentifier + + struct { + LexExprTokenType type; ///< Suggested type for parsing incorrect code. + const char *msg; ///< Error message. + } err; ///< For kExprLexInvalid + + struct { + union { + float_T floating; + uvarnumber_T integer; + } val; ///< Number value. + uint8_t base; ///< Base: 2, 8, 10 or 16. + bool is_float; ///< True if number is a floating-point. + } num; ///< For kExprLexNumber + + struct { + ExprAssignmentType type; + } ass; ///< For kExprLexAssignment + } data; ///< Additional data, if needed. +} LexExprToken; + +typedef enum { + /// If set, “pointer” to the current byte in pstate will not be shifted + kELFlagPeek = (1 << 0), + /// Determines whether scope is allowed to come before the identifier + kELFlagForbidScope = (1 << 1), + /// Determines whether floating-point numbers are allowed + /// + /// I.e. whether dot is a decimal point separator or is not a part of + /// a number at all. + kELFlagAllowFloat = (1 << 2), + /// Determines whether `is` and `isnot` are seen as comparison operators + /// + /// If set they are supposed to be just regular identifiers. + kELFlagIsNotCmp = (1 << 3), + /// Determines whether EOC tokens are allowed + /// + /// If set then it will yield Invalid token with E15 in place of EOC one if + /// “EOC” is something like "|". It is fine with emitting EOC at the end of + /// string still, with or without this flag set. + kELFlagForbidEOC = (1 << 4), + // XXX Whenever you add a new flag, alter klee_assume() statement in + // viml_expressions_lexer.c. +} LexExprFlags; + +/// Expression AST node type +typedef enum { + kExprNodeMissing = 0, + kExprNodeOpMissing, + kExprNodeTernary, ///< Ternary operator. + kExprNodeTernaryValue, ///< Ternary operator, colon. + kExprNodeRegister, ///< Register. + kExprNodeSubscript, ///< Subscript. + kExprNodeListLiteral, ///< List literal. + kExprNodeUnaryPlus, + kExprNodeBinaryPlus, + kExprNodeNested, ///< Nested parenthesised expression. + kExprNodeCall, ///< Function call. + /// Plain identifier: simple variable/function name + /// + /// Looks like "string", "g:Foo", etc: consists from a single + /// kExprLexPlainIdentifier token. + kExprNodePlainIdentifier, + /// Plain dictionary key, for use with kExprNodeConcatOrSubscript + kExprNodePlainKey, + /// Complex identifier: variable/function name with curly braces + kExprNodeComplexIdentifier, + /// Figure brace expression which is not yet known + /// + /// May resolve to any of kExprNodeDictLiteral, kExprNodeLambda or + /// kExprNodeCurlyBracesIdentifier. + kExprNodeUnknownFigure, + kExprNodeLambda, ///< Lambda. + kExprNodeDictLiteral, ///< Dictionary literal. + kExprNodeCurlyBracesIdentifier, ///< Part of the curly braces name. + kExprNodeComma, ///< Comma “operator”. + kExprNodeColon, ///< Colon “operator”. + kExprNodeArrow, ///< Arrow “operator”. + kExprNodeComparison, ///< Various comparison operators. + /// Concat operator + /// + /// To be only used in cases when it is known for sure it is not a subscript. + kExprNodeConcat, + /// Concat or subscript operator + /// + /// For cases when it is not obvious whether expression is a concat or + /// a subscript. May only have either number or plain identifier as the second + /// child. To make it easier to avoid curly braces in place of + /// kExprNodePlainIdentifier node kExprNodePlainKey is used. + kExprNodeConcatOrSubscript, + kExprNodeInteger, ///< Integral number. + kExprNodeFloat, ///< Floating-point number. + kExprNodeSingleQuotedString, + kExprNodeDoubleQuotedString, + kExprNodeOr, + kExprNodeAnd, + kExprNodeUnaryMinus, + kExprNodeBinaryMinus, + kExprNodeNot, + kExprNodeMultiplication, + kExprNodeDivision, + kExprNodeMod, + kExprNodeOption, + kExprNodeEnvironment, + kExprNodeAssignment, + // XXX When modifying this list also modify east_node_type_tab both in parser + // and in tests, and you most likely will also have to alter list of + // highlight groups stored in highlight_init_cmdline variable. +} ExprASTNodeType; + +typedef struct expr_ast_node ExprASTNode; + +/// Structure representing one AST node +struct expr_ast_node { + ExprASTNodeType type; ///< Node type. + /// Node children: e.g. for 1 + 2 nodes 1 and 2 will be children of +. + ExprASTNode *children; + /// Next node: e.g. for 1 + 2 child nodes 1 and 2 are put into a single-linked + /// list: `(+)->children` references only node 1, node 2 is in + /// `(+)->children->next`. + ExprASTNode *next; + ParserPosition start; + size_t len; + union { + struct { + int name; ///< Register name, may be -1 if name not present. + } reg; ///< For kExprNodeRegister. + struct { + /// Which nodes UnknownFigure can’t possibly represent. + struct { + /// True if UnknownFigure may actually represent dictionary literal. + bool allow_dict; + /// True if UnknownFigure may actually represent lambda. + bool allow_lambda; + /// True if UnknownFigure may actually be part of curly braces name. + bool allow_ident; + } type_guesses; + /// Highlight chunk index, used for rehighlighting if needed + size_t opening_hl_idx; + } fig; ///< For kExprNodeUnknownFigure. + struct { + ExprVarScope scope; ///< Scope character or 0 if not present. + /// Actual identifier without scope. + /// + /// Points to inside parser reader state. + const char *ident; + size_t ident_len; ///< Actual identifier length. + } var; ///< For kExprNodePlainIdentifier and kExprNodePlainKey. + struct { + bool got_colon; ///< True if colon was seen. + } ter; ///< For kExprNodeTernaryValue. + struct { + ExprComparisonType type; ///< Comparison type. + ExprCaseCompareStrategy ccs; ///< Case comparison strategy. + bool inv; ///< True if comparison is to be inverted. + } cmp; ///< For kExprNodeComparison. + struct { + uvarnumber_T value; + } num; ///< For kExprNodeInteger. + struct { + float_T value; + } flt; ///< For kExprNodeFloat. + struct { + char *value; + size_t size; + } str; ///< For kExprNodeSingleQuotedString and + ///< kExprNodeDoubleQuotedString. + struct { + const char *ident; ///< Option name start. + size_t ident_len; ///< Option name length. + ExprOptScope scope; ///< Option scope: &l:, &g: or not specified. + } opt; ///< For kExprNodeOption. + struct { + const char *ident; ///< Environment variable name start. + size_t ident_len; ///< Environment variable name length. + } env; ///< For kExprNodeEnvironment. + struct { + ExprAssignmentType type; + } ass; ///< For kExprNodeAssignment + } data; +}; + +enum { + /// Allow multiple expressions in a row: e.g. for :echo + /// + /// Parser will still parse only one of them though. + kExprFlagsMulti = (1 << 0), + /// Allow NL, NUL and bar to be EOC + /// + /// When parsing expressions input by user bar is assumed to be a binary + /// operator and other two are spacings. + kExprFlagsDisallowEOC = (1 << 1), + /// Parse :let argument + /// + /// That mean that top level node must be an assignment and first nodes + /// belong to lvalues. + kExprFlagsParseLet = (1 << 2), + // XXX whenever you add a new flag, alter klee_assume() statement in + // viml_expressions_parser.c, nvim_parse_expression() flags parsing + // alongside with its documentation and flag sets in check_parsing() + // function in expressions parser functional and unit tests. +} ExprParserFlags; + +/// AST error definition +typedef struct { + /// Error message. Must contain a single printf format atom: %.*s. + const char *msg; + /// Error message argument: points to the location of the error. + const char *arg; + /// Message argument length: length till the end of string. + int arg_len; +} ExprASTError; + +/// Structure representing complety AST for one expression +typedef struct { + /// When AST is not correct this message will be printed. + /// + /// Uses `emsgf(msg, arg_len, arg);`, `msg` is assumed to contain only `%.*s`. + ExprASTError err; + /// Root node of the AST. + ExprASTNode *root; +} ExprAST; + +/// Array mapping ExprASTNodeType to maximum amount of children node may have +extern const uint8_t node_maxchildren[]; + +/// Array mapping ExprASTNodeType values to their stringified versions +extern const char *const east_node_type_tab[]; + +/// Array mapping ExprComparisonType values to their stringified versions +extern const char *const eltkn_cmp_type_tab[]; + +/// Array mapping ExprCaseCompareStrategy values to their stringified versions +extern const char *const ccs_tab[]; + +/// Array mapping ExprAssignmentType values to their stringified versions +extern const char *const expr_asgn_type_tab[]; + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "viml/parser/expressions.h.generated.h" +#endif + +#endif // NVIM_VIML_PARSER_EXPRESSIONS_H diff --git a/src/nvim/viml/parser/parser.c b/src/nvim/viml/parser/parser.c new file mode 100644 index 0000000000..08d8846018 --- /dev/null +++ b/src/nvim/viml/parser/parser.c @@ -0,0 +1,13 @@ +#include "nvim/viml/parser/parser.h" + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "viml/parser/parser.c.generated.h" +#endif + + +void parser_simple_get_line(void *cookie, ParserLine *ret_pline) +{ + ParserLine **plines_p = (ParserLine **)cookie; + *ret_pline = **plines_p; + (*plines_p)++; +} diff --git a/src/nvim/viml/parser/parser.h b/src/nvim/viml/parser/parser.h new file mode 100644 index 0000000000..7ac49709d8 --- /dev/null +++ b/src/nvim/viml/parser/parser.h @@ -0,0 +1,244 @@ +#ifndef NVIM_VIML_PARSER_PARSER_H +#define NVIM_VIML_PARSER_PARSER_H + +#include <stdbool.h> +#include <stddef.h> +#include <assert.h> + +#include "nvim/lib/kvec.h" +#include "nvim/func_attr.h" +#include "nvim/mbyte.h" +#include "nvim/memory.h" + +/// One parsed line +typedef struct { + const char *data; ///< Parsed line pointer + size_t size; ///< Parsed line size + bool allocated; ///< True if line may be freed. +} ParserLine; + +/// Line getter type for parser +/// +/// Line getter must return {NULL, 0} for EOF. +typedef void (*ParserLineGetter)(void *cookie, ParserLine *ret_pline); + +/// Parser position in the input +typedef struct { + size_t line; ///< Line index in ParserInputReader.lines. + size_t col; ///< Byte index in the line. +} ParserPosition; + +/// Parser state item. +typedef struct { + enum { + kPTopStateParsingCommand = 0, + kPTopStateParsingExpression, + } type; + union { + struct { + enum { + kExprUnknown = 0, + } type; + } expr; + } data; +} ParserStateItem; + +/// Structure defining input reader +typedef struct { + /// Function used to get next line. + ParserLineGetter get_line; + /// Data for get_line function. + void *cookie; + /// All lines obtained by get_line. + kvec_withinit_t(ParserLine, 4) lines; + /// Conversion, for :scriptencoding. + vimconv_T conv; +} ParserInputReader; + +/// Highlighted region definition +/// +/// Note: one chunk may highlight only one line. +typedef struct { + ParserPosition start; ///< Start of the highlight: line and column. + size_t end_col; ///< End column, points to the start of the next character. + const char *group; ///< Highlight group. +} ParserHighlightChunk; + +/// Highlighting defined by a parser +typedef kvec_withinit_t(ParserHighlightChunk, 16) ParserHighlight; + +/// Structure defining parser state +typedef struct { + /// Line reader. + ParserInputReader reader; + /// Position up to which input was parsed. + ParserPosition pos; + /// Parser state stack. + kvec_withinit_t(ParserStateItem, 16) stack; + /// Highlighting support. + ParserHighlight *colors; + /// True if line continuation can be used. + bool can_continuate; +} ParserState; + +static inline void viml_parser_init( + ParserState *const ret_pstate, + const ParserLineGetter get_line, void *const cookie, + ParserHighlight *const colors) + REAL_FATTR_ALWAYS_INLINE REAL_FATTR_NONNULL_ARG(1, 2); + +/// Initialize a new parser state instance +/// +/// @param[out] ret_pstate Parser state to initialize. +/// @param[in] get_line Line getter function. +/// @param[in] cookie Argument for the get_line function. +/// @param[in] colors Where to save highlighting. May be NULL if it is not +/// needed. +static inline void viml_parser_init( + ParserState *const ret_pstate, + const ParserLineGetter get_line, void *const cookie, + ParserHighlight *const colors) +{ + *ret_pstate = (ParserState) { + .reader = { + .get_line = get_line, + .cookie = cookie, + .conv = MBYTE_NONE_CONV, + }, + .pos = { 0, 0 }, + .colors = colors, + .can_continuate = false, + }; + kvi_init(ret_pstate->reader.lines); + kvi_init(ret_pstate->stack); +} + +static inline void viml_parser_destroy(ParserState *const pstate) + REAL_FATTR_NONNULL_ALL REAL_FATTR_ALWAYS_INLINE; + +/// Free all memory allocated by the parser on heap +/// +/// @param pstate Parser state to free. +static inline void viml_parser_destroy(ParserState *const pstate) +{ + for (size_t i = 0; i < kv_size(pstate->reader.lines); i++) { + ParserLine pline = kv_A(pstate->reader.lines, i); + if (pline.allocated) { + xfree((void *)pline.data); + } + } + kvi_destroy(pstate->reader.lines); + kvi_destroy(pstate->stack); +} + +static inline void viml_preader_get_line(ParserInputReader *const preader, + ParserLine *const ret_pline) + REAL_FATTR_NONNULL_ALL; + +/// Get one line from ParserInputReader +static inline void viml_preader_get_line(ParserInputReader *const preader, + ParserLine *const ret_pline) +{ + ParserLine pline; + preader->get_line(preader->cookie, &pline); + if (preader->conv.vc_type != CONV_NONE && pline.size) { + ParserLine cpline = { + .allocated = true, + .size = pline.size, + }; + cpline.data = (char *)string_convert(&preader->conv, + (char_u *)pline.data, + &cpline.size); + if (pline.allocated) { + xfree((void *)pline.data); + } + pline = cpline; + } + kvi_push(preader->lines, pline); + *ret_pline = pline; +} + +static inline bool viml_parser_get_remaining_line(ParserState *const pstate, + ParserLine *const ret_pline) + REAL_FATTR_ALWAYS_INLINE REAL_FATTR_WARN_UNUSED_RESULT REAL_FATTR_NONNULL_ALL; + +/// Get currently parsed line, shifted to pstate->pos.col +/// +/// @param pstate Parser state to operate on. +/// +/// @return True if there is a line, false in case of EOF. +static inline bool viml_parser_get_remaining_line(ParserState *const pstate, + ParserLine *const ret_pline) +{ + const size_t num_lines = kv_size(pstate->reader.lines); + if (pstate->pos.line == num_lines) { + viml_preader_get_line(&pstate->reader, ret_pline); + } else { + *ret_pline = kv_last(pstate->reader.lines); + } + assert(pstate->pos.line == kv_size(pstate->reader.lines) - 1); + if (ret_pline->data != NULL) { + ret_pline->data += pstate->pos.col; + ret_pline->size -= pstate->pos.col; + } + return ret_pline->data != NULL; +} + +static inline void viml_parser_advance(ParserState *const pstate, + const size_t len) + REAL_FATTR_ALWAYS_INLINE REAL_FATTR_NONNULL_ALL; + +/// Advance position by a given number of bytes +/// +/// At maximum advances to the next line. +/// +/// @param pstate Parser state to advance. +/// @param[in] len Number of bytes to advance. +static inline void viml_parser_advance(ParserState *const pstate, + const size_t len) +{ + assert(pstate->pos.line == kv_size(pstate->reader.lines) - 1); + const ParserLine pline = kv_last(pstate->reader.lines); + if (pstate->pos.col + len >= pline.size) { + pstate->pos.line++; + pstate->pos.col = 0; + } else { + pstate->pos.col += len; + } +} + +static inline void viml_parser_highlight(ParserState *const pstate, + const ParserPosition start, + const size_t end_col, + const char *const group) + REAL_FATTR_ALWAYS_INLINE REAL_FATTR_NONNULL_ALL; + +/// Record highlighting of some region of text +/// +/// @param pstate Parser state to work with. +/// @param[in] start Start position of the highlight. +/// @param[in] len Highlighting chunk length. +/// @param[in] group Highlight group. +static inline void viml_parser_highlight(ParserState *const pstate, + const ParserPosition start, + const size_t len, + const char *const group) +{ + if (pstate->colors == NULL || len == 0) { + return; + } + assert(kv_size(*pstate->colors) == 0 + || kv_Z(*pstate->colors, 0).start.line < start.line + || kv_Z(*pstate->colors, 0).end_col <= start.col); + kvi_push(*pstate->colors, ((ParserHighlightChunk) { + .start = start, + .end_col = start.col + len, + .group = group, + })); +} + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "viml/parser/parser.h.generated.h" +#endif + +#endif // NVIM_VIML_PARSER_PARSER_H |