// This is an open source non-commercial project. Dear PVS-Studio, please check // it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com /// VimL expression parser // Planned incompatibilities (to be included into vim_diff.txt when this parser // will be an actual part of VimL evaluation process): // // 1. Expressions are first fully parsed and only then executed. This means // that while ":echo [system('touch abc')" will create file "abc" in Vim and // only then raise syntax error regarding missing comma in list in Neovim // trying to execute that will immediately raise syntax error regarding // missing list end without actually executing anything. // 2. Expressions are first fully parsed, without considering any runtime // information. This means things like that "d.a" does not change its // meaning depending on type of "d" (or whether Vim is currently executing or // skipping). For compatibility reasons the dot thus may either be “concat // or subscript” operator or just “concat” operator. // 3. Expressions parser is aware whether it is called for :echo or =. // This means that while "=1 | 2" is equivalent to "=1" // because "| 2" part is left to be treated as a command separator and then // ignored in Neovim it is an error. // 4. Expressions parser has generally better error reporting. But for // compatibility reasons most errors have error code E15 while error messages // are significantly different from Vim’s E15. Also some error codes were // retired because of being harder to emulate or because of them being // a result of differences in parsing process: e.g. with ":echo {a, b}" Vim // will attempt to parse expression as lambda, fail, check whether it is // a curly-braces-name, fail again, and evaluate that as a dictionary, giving // error regarding undefined variable "a" (or about missing colon). Neovim // will not try to evaluate anything here: comma right after an argument name // means that expression may not be anything, but lambda, so the resulting // error message will never be about missing variable or colon: it will be // about missing arrow (or a continuation of argument list). // 5. Failing to parse expression always gives exactly one error message: no // more stack of error messages like > // // :echo [1, // E697: Missing end of List ']': // E15: Invalid expression: [1, // // < , just exactly one E697 message. // 6. Some expressions involving calling parenthesis which are treated // separately by Vim even when not separated by spaces are treated as one // expression by Neovim: e.g. ":echo (1)(1)" will yield runtime error after // failing to call "1", while Vim will echo "1 1". Reasoning is the same: // type of what is in the first expression is generally not known when // parsing, so to have separate expressions like this separate them with // spaces. // 7. 'isident' no longer applies to environment variables, they always include // ASCII alphanumeric characters and underscore and nothing except this. #include #include #include #include #include "klib/kvec.h" #include "nvim/ascii.h" #include "nvim/assert.h" #include "nvim/charset.h" #include "nvim/eval/typval.h" #include "nvim/memory.h" #include "nvim/types.h" #include "nvim/vim.h" #include "nvim/viml/parser/expressions.h" #include "nvim/viml/parser/parser.h" typedef kvec_withinit_t(ExprASTNode **, 16) ExprASTStack; /// Which nodes may be wanted typedef enum { /// Operators: function call, subscripts, binary operators, … /// /// For unrestricted expressions. kENodeOperator, /// Values: literals, variables, nested expressions, unary operators. /// /// For unrestricted expressions as well, implies that top item in AST stack /// points to NULL. kENodeValue, } ExprASTWantedNode; /// Parse type: what is being parsed currently typedef enum { /// Parsing regular VimL expression kEPTExpr = 0, /// Parsing lambda arguments /// /// Just like parsing function arguments, but it is valid to be ended with an /// arrow only. kEPTLambdaArguments, /// Assignment: parsing for :let kEPTAssignment, /// Single assignment: used when lists are not allowed (i.e. when nesting) kEPTSingleAssignment, } ExprASTParseType; typedef kvec_withinit_t(ExprASTParseType, 4) ExprASTParseTypeStack; /// Operator priority level typedef enum { kEOpLvlInvalid = 0, kEOpLvlComplexIdentifier, kEOpLvlParens, kEOpLvlAssignment, kEOpLvlArrow, kEOpLvlComma, kEOpLvlColon, kEOpLvlTernaryValue, kEOpLvlTernary, kEOpLvlOr, kEOpLvlAnd, kEOpLvlComparison, kEOpLvlAddition, ///< Addition, subtraction and concatenation. kEOpLvlMultiplication, ///< Multiplication, division and modulo. kEOpLvlUnary, ///< Unary operations: not, minus, plus. kEOpLvlSubscript, ///< Subscripts. kEOpLvlValue, ///< Values: literals, variables, nested expressions, … } ExprOpLvl; /// Operator associativity typedef enum { kEOpAssNo= 'n', ///< Not associative / not applicable. kEOpAssLeft = 'l', ///< Left associativity. kEOpAssRight = 'r', ///< Right associativity. } ExprOpAssociativity; #ifdef INCLUDE_GENERATED_DECLARATIONS # include "viml/parser/expressions.c.generated.h" #endif /// Scale number by a given factor /// /// Used to apply exponent to a number. Idea taken from uClibc. /// /// @param[in] num Number to scale. Does not bother doing anything if it is /// zero. /// @param[in] base Base, should be 10 since non-decimal floating-point /// numbers are not supported. /// @param[in] exponent Exponent to scale by. /// @param[in] exponent_negative True if exponent is negative. static inline float_T scale_number(const float_T num, const uint8_t base, const uvarnumber_T exponent, const bool exponent_negative) FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_CONST { if (num == 0 || exponent == 0) { return num; } assert(base); uvarnumber_T exp = exponent; float_T p_base = (float_T)base; float_T ret = num; while (exp) { if (exp & 1) { if (exponent_negative) { ret /= p_base; } else { ret *= p_base; } } exp >>= 1; p_base *= p_base; } return ret; } /// Get next token for the VimL expression input /// /// @param pstate Parser state. /// @param[in] flags Flags, @see LexExprFlags. /// /// @return Next token. LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL { LexExprToken ret = { .type = kExprLexInvalid, .start = pstate->pos, }; ParserLine pline; if (!viml_parser_get_remaining_line(pstate, &pline)) { ret.type = kExprLexEOC; return ret; } if (pline.size <= 0) { ret.len = 0; ret.type = kExprLexEOC; goto viml_pexpr_next_token_adv_return; } ret.len = 1; const uint8_t schar = (uint8_t)pline.data[0]; #define GET_CCS(ret, pline) \ do { \ if (ret.len < pline.size \ && strchr("?#", pline.data[ret.len]) != NULL) { \ ret.data.cmp.ccs = \ (ExprCaseCompareStrategy)pline.data[ret.len]; \ ret.len++; \ } else { \ ret.data.cmp.ccs = kCCStrategyUseOption; \ } \ } while (0) switch (schar) { // Paired brackets. #define BRACKET(typ, opning, clsing) \ case opning: \ case clsing: { \ ret.type = typ; \ ret.data.brc.closing = (schar == clsing); \ break; \ } BRACKET(kExprLexParenthesis, '(', ')') BRACKET(kExprLexBracket, '[', ']') BRACKET(kExprLexFigureBrace, '{', '}') #undef BRACKET // Single character tokens without data. #define CHAR(typ, ch) \ case ch: { \ ret.type = typ; \ break; \ } CHAR(kExprLexQuestion, '?') CHAR(kExprLexColon, ':') CHAR(kExprLexComma, ',') #undef CHAR // Multiplication/division/modulo. #define MUL(mul_type, ch) \ case ch: { \ ret.type = kExprLexMultiplication; \ ret.data.mul.type = mul_type; \ break; \ } MUL(kExprLexMulMul, '*') MUL(kExprLexMulDiv, '/') MUL(kExprLexMulMod, '%') #undef MUL #define CHARREG(typ, cond) \ do { \ ret.type = typ; \ for (; (ret.len < pline.size \ && cond(pline.data[ret.len])) \ ; ret.len++) { \ } \ } while (0) // Whitespace. case ' ': case TAB: CHARREG(kExprLexSpacing, ascii_iswhite); break; // Control character, except for NUL, NL and TAB. case Ctrl_A: case Ctrl_B: case Ctrl_C: case Ctrl_D: case Ctrl_E: case Ctrl_F: case Ctrl_G: case Ctrl_H: case Ctrl_K: case Ctrl_L: case Ctrl_M: case Ctrl_N: case Ctrl_O: case Ctrl_P: case Ctrl_Q: case Ctrl_R: case Ctrl_S: case Ctrl_T: case Ctrl_U: case Ctrl_V: case Ctrl_W: case Ctrl_X: case Ctrl_Y: case Ctrl_Z: #define ISCTRL(schar) (schar < ' ') CHARREG(kExprLexInvalid, ISCTRL); ret.data.err.type = kExprLexSpacing; ret.data.err.msg = _("E15: Invalid control character present in input: %.*s"); break; #undef ISCTRL // Number. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { ret.data.num.is_float = false; ret.data.num.base = 10; size_t frac_start = 0; size_t exp_start = 0; size_t frac_end = 0; bool exp_negative = false; CHARREG(kExprLexNumber, ascii_isdigit); if (flags & kELFlagAllowFloat) { const LexExprToken non_float_ret = ret; if (pline.size > ret.len + 1 && pline.data[ret.len] == '.' && ascii_isdigit(pline.data[ret.len + 1])) { ret.len++; frac_start = ret.len; frac_end = ret.len; ret.data.num.is_float = true; for (; ret.len < pline.size && ascii_isdigit(pline.data[ret.len]) ; ret.len++) { // A small optimization: trailing zeroes in fractional part do not // add anything to significand, so it is useless to include them in // frac_end. if (pline.data[ret.len] != '0') { frac_end = ret.len + 1; } } if (pline.size > ret.len + 1 && (pline.data[ret.len] == 'e' || pline.data[ret.len] == 'E') && ((pline.size > ret.len + 2 && (pline.data[ret.len + 1] == '+' || pline.data[ret.len + 1] == '-') && ascii_isdigit(pline.data[ret.len + 2])) || ascii_isdigit(pline.data[ret.len + 1]))) { ret.len++; if (pline.data[ret.len] == '+' || (exp_negative = (pline.data[ret.len] == '-'))) { ret.len++; } exp_start = ret.len; CHARREG(kExprLexNumber, ascii_isdigit); } } if (pline.size > ret.len && (pline.data[ret.len] == '.' || ASCII_ISALPHA(pline.data[ret.len]))) { ret = non_float_ret; } } // TODO(ZyX-I): detect overflows if (ret.data.num.is_float) { // Vim used to use string2float here which in turn uses strtod(). There // are two problems with this approach: // 1. strtod() is locale-dependent. Not sure how it is worked around so // that I do not see relevant bugs, but it still does not look like // a good idea. // 2. strtod() does not accept length argument. // // The below variant of parsing floats was recognized as acceptable // because it is basically how uClibc does the thing: it generates // a number ignoring decimal point (but recording its position), then // uses recorded position to scale number down when processing exponent. float_T significand_part = 0; uvarnumber_T exp_part = 0; const size_t frac_size = (size_t)(frac_end - frac_start); for (size_t i = 0; i < frac_end; i++) { if (i == frac_start - 1) { continue; } significand_part = significand_part * 10 + (pline.data[i] - '0'); } if (exp_start) { vim_str2nr(pline.data + exp_start, NULL, NULL, 0, NULL, &exp_part, (int)(ret.len - exp_start), false); } if (exp_negative) { exp_part += frac_size; } else { if (exp_part < frac_size) { exp_negative = true; exp_part = frac_size - exp_part; } else { exp_part -= frac_size; } } ret.data.num.val.floating = scale_number(significand_part, 10, exp_part, exp_negative); } else { int len; int prep; vim_str2nr(pline.data, &prep, &len, STR2NR_ALL, NULL, &ret.data.num.val.integer, (int)pline.size, false); ret.len = (size_t)len; const uint8_t bases[] = { [0] = 10, ['0'] = 8, ['x'] = 16, ['X'] = 16, ['b'] = 2, ['B'] = 2, }; ret.data.num.base = bases[prep]; } break; } #define ISWORD_OR_AUTOLOAD(x) \ (ascii_isident(x) || (x) == AUTOLOAD_CHAR) // Environment variable. case '$': CHARREG(kExprLexEnv, ascii_isident); break; // Normal variable/function name. case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': ret.data.var.scope = 0; ret.data.var.autoload = false; CHARREG(kExprLexPlainIdentifier, ascii_isident); // "is" and "isnot" operators. if (!(flags & kELFlagIsNotCmp) && ((ret.len == 2 && memcmp(pline.data, "is", 2) == 0) || (ret.len == 5 && memcmp(pline.data, "isnot", 5) == 0))) { ret.type = kExprLexComparison; ret.data.cmp.type = kExprCmpIdentical; ret.data.cmp.inv = (ret.len == 5); GET_CCS(ret, pline); // Scope: `s:`, etc. } else if (ret.len == 1 && pline.size > 1 && memchr(EXPR_VAR_SCOPE_LIST, schar, sizeof(EXPR_VAR_SCOPE_LIST)) != NULL && pline.data[ret.len] == ':' && !(flags & kELFlagForbidScope)) { ret.len++; ret.data.var.scope = (ExprVarScope)schar; CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD); ret.data.var.autoload = ( memchr(pline.data + 2, AUTOLOAD_CHAR, ret.len - 2) != NULL); // Previous CHARREG stopped at autoload character in order to make it // possible to detect `is#`. Continue now with autoload characters // included. // // Warning: there is ambiguity for the lexer: `is#Foo(1)` is a call of // function `is#Foo()`, `1is#Foo(1)` is a comparison `1 is# Foo(1)`. This // needs to be resolved on the higher level where context is available. } else if (pline.size > ret.len && pline.data[ret.len] == AUTOLOAD_CHAR) { ret.data.var.autoload = true; CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD); } break; #undef ISWORD_OR_AUTOLOAD #undef CHARREG // Option. case '&': { #define OPTNAMEMISS(ret) \ do { \ ret.type = kExprLexInvalid; \ ret.data.err.type = kExprLexOption; \ ret.data.err.msg = _("E112: Option name missing: %.*s"); \ } while (0) if (pline.size > 1 && pline.data[1] == '&') { ret.type = kExprLexAnd; ret.len++; break; } if (pline.size == 1 || !ASCII_ISALPHA(pline.data[1])) { OPTNAMEMISS(ret); break; } ret.type = kExprLexOption; if (pline.size > 2 && pline.data[2] == ':' && memchr(EXPR_OPT_SCOPE_LIST, pline.data[1], sizeof(EXPR_OPT_SCOPE_LIST)) != NULL) { ret.len += 2; ret.data.opt.scope = (ExprOptScope)pline.data[1]; ret.data.opt.name = pline.data + 3; } else { ret.data.opt.scope = kExprOptScopeUnspecified; ret.data.opt.name = pline.data + 1; } const char *p = ret.data.opt.name; const char *const e = pline.data + pline.size; if (e - p >= 4 && p[0] == 't' && p[1] == '_') { ret.data.opt.len = 4; ret.len += 4; } else { for (; p < e && ASCII_ISALPHA(*p); p++) {} ret.data.opt.len = (size_t)(p - ret.data.opt.name); if (ret.data.opt.len == 0) { OPTNAMEMISS(ret); } else { ret.len += ret.data.opt.len; } } break; #undef OPTNAMEMISS } // Register. case '@': ret.type = kExprLexRegister; if (pline.size > 1) { ret.len++; ret.data.reg.name = (uint8_t)pline.data[1]; } else { ret.data.reg.name = -1; } break; // Single quoted string. case '\'': ret.type = kExprLexSingleQuotedString; ret.data.str.closed = false; for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) { if (pline.data[ret.len] == '\'') { if (ret.len + 1 < pline.size && pline.data[ret.len + 1] == '\'') { ret.len++; } else { ret.data.str.closed = true; } } } break; // Double quoted string. case '"': ret.type = kExprLexDoubleQuotedString; ret.data.str.closed = false; for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) { if (pline.data[ret.len] == '\\') { if (ret.len + 1 < pline.size) { ret.len++; } } else if (pline.data[ret.len] == '"') { ret.data.str.closed = true; } } break; // Unary not, (un)equality and regex (not) match comparison operators. case '!': case '=': if (pline.size == 1) { ret.type = (schar == '!' ? kExprLexNot : kExprLexAssignment); ret.data.ass.type = kExprAsgnPlain; break; } ret.type = kExprLexComparison; ret.data.cmp.inv = (schar == '!'); if (pline.data[1] == '=') { ret.data.cmp.type = kExprCmpEqual; ret.len++; } else if (pline.data[1] == '~') { ret.data.cmp.type = kExprCmpMatches; ret.len++; } else if (schar == '!') { ret.type = kExprLexNot; } else { ret.type = kExprLexAssignment; ret.data.ass.type = kExprAsgnPlain; } GET_CCS(ret, pline); break; // Less/greater [or equal to] comparison operators. case '>': case '<': { ret.type = kExprLexComparison; const bool haseqsign = (pline.size > 1 && pline.data[1] == '='); if (haseqsign) { ret.len++; } GET_CCS(ret, pline); ret.data.cmp.inv = (schar == '<'); ret.data.cmp.type = ((ret.data.cmp.inv ^ haseqsign) ? kExprCmpGreaterOrEqual : kExprCmpGreater); break; } // Minus sign, arrow from lambdas or augmented assignment. case '-': { if (pline.size > 1 && pline.data[1] == '>') { ret.len++; ret.type = kExprLexArrow; } else if (pline.size > 1 && pline.data[1] == '=') { ret.len++; ret.type = kExprLexAssignment; ret.data.ass.type = kExprAsgnSubtract; } else { ret.type = kExprLexMinus; } break; } // Sign or augmented assignment. #define CHAR_OR_ASSIGN(ch, ch_type, ass_type) \ case ch: { \ if (pline.size > 1 && pline.data[1] == '=') { \ ret.len++; \ ret.type = kExprLexAssignment; \ ret.data.ass.type = ass_type; \ } else { \ ret.type = ch_type; \ } \ break; \ } CHAR_OR_ASSIGN('+', kExprLexPlus, kExprAsgnAdd) CHAR_OR_ASSIGN('.', kExprLexDot, kExprAsgnConcat) #undef CHAR_OR_ASSIGN // Expression end because Ex command ended. case NUL: case NL: if (flags & kELFlagForbidEOC) { ret.type = kExprLexInvalid; ret.data.err.msg = _("E15: Unexpected EOC character: %.*s"); ret.data.err.type = kExprLexSpacing; } else { ret.type = kExprLexEOC; } break; case '|': if (pline.size >= 2 && pline.data[ret.len] == '|') { // "||" is or. ret.len++; ret.type = kExprLexOr; } else if (flags & kELFlagForbidEOC) { // Note: `=1 | 2` actually yields 1 in Vim without any // errors. This will be changed here. ret.type = kExprLexInvalid; ret.data.err.msg = _("E15: Unexpected EOC character: %.*s"); ret.data.err.type = kExprLexOr; } else { ret.type = kExprLexEOC; } break; // Everything else is not valid. default: ret.len = (size_t)utfc_ptr2len_len(pline.data, (int)pline.size); ret.type = kExprLexInvalid; ret.data.err.type = kExprLexPlainIdentifier; ret.data.err.msg = _("E15: Unidentified character: %.*s"); break; } #undef GET_CCS viml_pexpr_next_token_adv_return: if (!(flags & kELFlagPeek)) { viml_parser_advance(pstate, ret.len); } return ret; } static const char *const eltkn_type_tab[] = { [kExprLexInvalid] = "Invalid", [kExprLexMissing] = "Missing", [kExprLexSpacing] = "Spacing", [kExprLexEOC] = "EOC", [kExprLexQuestion] = "Question", [kExprLexColon] = "Colon", [kExprLexOr] = "Or", [kExprLexAnd] = "And", [kExprLexComparison] = "Comparison", [kExprLexPlus] = "Plus", [kExprLexMinus] = "Minus", [kExprLexDot] = "Dot", [kExprLexMultiplication] = "Multiplication", [kExprLexNot] = "Not", [kExprLexNumber] = "Number", [kExprLexSingleQuotedString] = "SingleQuotedString", [kExprLexDoubleQuotedString] = "DoubleQuotedString", [kExprLexOption] = "Option", [kExprLexRegister] = "Register", [kExprLexEnv] = "Env", [kExprLexPlainIdentifier] = "PlainIdentifier", [kExprLexBracket] = "Bracket", [kExprLexFigureBrace] = "FigureBrace", [kExprLexParenthesis] = "Parenthesis", [kExprLexComma] = "Comma", [kExprLexArrow] = "Arrow", [kExprLexAssignment] = "Assignment", }; const char *const eltkn_cmp_type_tab[] = { [kExprCmpEqual] = "Equal", [kExprCmpMatches] = "Matches", [kExprCmpGreater] = "Greater", [kExprCmpGreaterOrEqual] = "GreaterOrEqual", [kExprCmpIdentical] = "Identical", }; const char *const expr_asgn_type_tab[] = { [kExprAsgnPlain] = "Plain", [kExprAsgnAdd] = "Add", [kExprAsgnSubtract] = "Subtract", [kExprAsgnConcat] = "Concat", }; const char *const ccs_tab[] = { [kCCStrategyUseOption] = "UseOption", [kCCStrategyMatchCase] = "MatchCase", [kCCStrategyIgnoreCase] = "IgnoreCase", }; static const char *const eltkn_mul_type_tab[] = { [kExprLexMulMul] = "Mul", [kExprLexMulDiv] = "Div", [kExprLexMulMod] = "Mod", }; static const char *const eltkn_opt_scope_tab[] = { [kExprOptScopeUnspecified] = "Unspecified", [kExprOptScopeGlobal] = "Global", [kExprOptScopeLocal] = "Local", }; /// Represent token as a string /// /// Intended for testing and debugging purposes. /// /// @param[in] pstate Parser state, needed to get token string from it. May be /// NULL, in which case in place of obtaining part of the /// string represented by token only token length is /// returned. /// @param[in] token Token to represent. /// @param[out] ret_size Return string size, for cases like NULs inside /// a string. May be NULL. /// /// @return Token represented in a string form, in a static buffer (overwritten /// on each call). const char *viml_pexpr_repr_token(const ParserState *const pstate, const LexExprToken token, size_t *const ret_size) FUNC_ATTR_WARN_UNUSED_RESULT { static char ret[1024]; char *p = ret; const char *const e = &ret[1024] - 1; #define ADDSTR(...) \ do { \ p += snprintf(p, (size_t)(sizeof(ret) - (size_t)(p - ret)), __VA_ARGS__); \ if (p >= e) { \ goto viml_pexpr_repr_token_end; \ } \ } while (0) ADDSTR("%zu:%zu:%s", token.start.line, token.start.col, eltkn_type_tab[token.type]); switch (token.type) { #define TKNARGS(tkn_type, ...) \ case tkn_type: { \ ADDSTR(__VA_ARGS__); \ break; \ } TKNARGS(kExprLexComparison, "(type=%s,ccs=%s,inv=%i)", eltkn_cmp_type_tab[token.data.cmp.type], ccs_tab[token.data.cmp.ccs], (int)token.data.cmp.inv) TKNARGS(kExprLexMultiplication, "(type=%s)", eltkn_mul_type_tab[token.data.mul.type]) TKNARGS(kExprLexAssignment, "(type=%s)", expr_asgn_type_tab[token.data.ass.type]) TKNARGS(kExprLexRegister, "(name=%s)", intchar2str(token.data.reg.name)) case kExprLexDoubleQuotedString: TKNARGS(kExprLexSingleQuotedString, "(closed=%i)", (int)token.data.str.closed) TKNARGS(kExprLexOption, "(scope=%s,name=%.*s)", eltkn_opt_scope_tab[token.data.opt.scope], (int)token.data.opt.len, token.data.opt.name) TKNARGS(kExprLexPlainIdentifier, "(scope=%s,autoload=%i)", intchar2str((int)token.data.var.scope), (int)token.data.var.autoload) TKNARGS(kExprLexNumber, "(is_float=%i,base=%i,val=%lg)", (int)token.data.num.is_float, (int)token.data.num.base, (double)(token.data.num.is_float ? (double)token.data.num.val.floating : (double)token.data.num.val.integer)) TKNARGS(kExprLexInvalid, "(msg=%s)", token.data.err.msg) default: // No additional arguments. break; #undef TKNARGS } if (pstate == NULL) { ADDSTR("::%zu", token.len); } else { *p++ = ':'; memmove(p, &pstate->reader.lines.items[token.start.line].data[token.start.col], token.len); p += token.len; *p = NUL; } #undef ADDSTR viml_pexpr_repr_token_end: if (ret_size != NULL) { *ret_size = (size_t)(p - ret); } return ret; } const char *const east_node_type_tab[] = { [kExprNodeMissing] = "Missing", [kExprNodeOpMissing] = "OpMissing", [kExprNodeTernary] = "Ternary", [kExprNodeTernaryValue] = "TernaryValue", [kExprNodeRegister] = "Register", [kExprNodeSubscript] = "Subscript", [kExprNodeListLiteral] = "ListLiteral", [kExprNodeUnaryPlus] = "UnaryPlus", [kExprNodeBinaryPlus] = "BinaryPlus", [kExprNodeNested] = "Nested", [kExprNodeCall] = "Call", [kExprNodePlainIdentifier] = "PlainIdentifier", [kExprNodePlainKey] = "PlainKey", [kExprNodeComplexIdentifier] = "ComplexIdentifier", [kExprNodeUnknownFigure] = "UnknownFigure", [kExprNodeLambda] = "Lambda", [kExprNodeDictLiteral] = "DictLiteral", [kExprNodeCurlyBracesIdentifier] = "CurlyBracesIdentifier", [kExprNodeComma] = "Comma", [kExprNodeColon] = "Colon", [kExprNodeArrow] = "Arrow", [kExprNodeComparison] = "Comparison", [kExprNodeConcat] = "Concat", [kExprNodeConcatOrSubscript] = "ConcatOrSubscript", [kExprNodeInteger] = "Integer", [kExprNodeFloat] = "Float", [kExprNodeSingleQuotedString] = "SingleQuotedString", [kExprNodeDoubleQuotedString] = "DoubleQuotedString", [kExprNodeOr] = "Or", [kExprNodeAnd] = "And", [kExprNodeUnaryMinus] = "UnaryMinus", [kExprNodeBinaryMinus] = "BinaryMinus", [kExprNodeNot] = "Not", [kExprNodeMultiplication] = "Multiplication", [kExprNodeDivision] = "Division", [kExprNodeMod] = "Mod", [kExprNodeOption] = "Option", [kExprNodeEnvironment] = "Environment", [kExprNodeAssignment] = "Assignment", }; /// Represent `int` character as a string /// /// Converts /// - ASCII digits into '{digit}' /// - ASCII printable characters into a single-character strings /// - everything else to numbers. /// /// @param[in] ch Character to convert. /// /// @return Converted string, stored in a static buffer (overridden after each /// call). static const char *intchar2str(const int ch) FUNC_ATTR_WARN_UNUSED_RESULT { static char buf[sizeof(int) * 3 + 1]; if (' ' <= ch && ch < 0x7f) { if (ascii_isdigit(ch)) { buf[0] = '\''; buf[1] = (char)ch; buf[2] = '\''; buf[3] = NUL; } else { buf[0] = (char)ch; buf[1] = NUL; } } else { snprintf(buf, sizeof(buf), "%i", ch); } return buf; } #ifdef UNIT_TESTING # include REAL_FATTR_UNUSED static inline void viml_pexpr_debug_print_ast_node(const ExprASTNode *const *const eastnode_p, const char *const prefix) { if (*eastnode_p == NULL) { fprintf(stderr, "%s %p : NULL\n", prefix, (void *)eastnode_p); } else { fprintf(stderr, "%s %p : %p : %s : %zu:%zu:%zu\n", prefix, (void *)eastnode_p, (void *)(*eastnode_p), east_node_type_tab[(*eastnode_p)->type], (*eastnode_p)->start.line, (*eastnode_p)->start.col, (*eastnode_p)->len); } } REAL_FATTR_UNUSED static inline void viml_pexpr_debug_print_ast_stack(const ExprASTStack *const ast_stack, const char *const msg) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE { fprintf(stderr, "\n%sstack: %zu:\n", msg, kv_size(*ast_stack)); for (size_t i = 0; i < kv_size(*ast_stack); i++) { viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)kv_A(*ast_stack, i), "-"); } } REAL_FATTR_UNUSED static inline void viml_pexpr_debug_print_token(const ParserState *const pstate, const LexExprToken token) FUNC_ATTR_ALWAYS_INLINE { fprintf(stderr, "\ntkn: %s\n", viml_pexpr_repr_token(pstate, token, NULL)); } # define PSTACK(msg) \ viml_pexpr_debug_print_ast_stack(&ast_stack, #msg) # define PSTACK_P(msg) \ viml_pexpr_debug_print_ast_stack(ast_stack, #msg) # define PNODE_P(eastnode_p, msg) \ viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)eastnode_p, \ (#msg)) # define PTOKEN(tkn) \ viml_pexpr_debug_print_token(pstate, tkn) #endif const uint8_t node_maxchildren[] = { [kExprNodeMissing] = 0, [kExprNodeOpMissing] = 2, [kExprNodeTernary] = 2, [kExprNodeTernaryValue] = 2, [kExprNodeRegister] = 0, [kExprNodeSubscript] = 2, [kExprNodeListLiteral] = 1, [kExprNodeUnaryPlus] = 1, [kExprNodeBinaryPlus] = 2, [kExprNodeNested] = 1, [kExprNodeCall] = 2, [kExprNodePlainIdentifier] = 0, [kExprNodePlainKey] = 0, [kExprNodeComplexIdentifier] = 2, [kExprNodeUnknownFigure] = 1, [kExprNodeLambda] = 2, [kExprNodeDictLiteral] = 1, [kExprNodeCurlyBracesIdentifier] = 1, [kExprNodeComma] = 2, [kExprNodeColon] = 2, [kExprNodeArrow] = 2, [kExprNodeComparison] = 2, [kExprNodeConcat] = 2, [kExprNodeConcatOrSubscript] = 2, [kExprNodeInteger] = 0, [kExprNodeFloat] = 0, [kExprNodeSingleQuotedString] = 0, [kExprNodeDoubleQuotedString] = 0, [kExprNodeOr] = 2, [kExprNodeAnd] = 2, [kExprNodeUnaryMinus] = 1, [kExprNodeBinaryMinus] = 2, [kExprNodeNot] = 1, [kExprNodeMultiplication] = 2, [kExprNodeDivision] = 2, [kExprNodeMod] = 2, [kExprNodeOption] = 0, [kExprNodeEnvironment] = 0, [kExprNodeAssignment] = 2, }; /// Free memory occupied by AST /// /// @param ast AST stack to free. void viml_pexpr_free_ast(ExprAST ast) { ExprASTStack ast_stack; kvi_init(ast_stack); kvi_push(ast_stack, &ast.root); while (kv_size(ast_stack)) { ExprASTNode **const cur_node = kv_last(ast_stack); #ifndef NDEBUG // Explicitly check for AST recursiveness. for (size_t i = 0; i < kv_size(ast_stack) - 1; i++) { assert(*kv_A(ast_stack, i) != *cur_node); } #endif if (*cur_node == NULL) { assert(kv_size(ast_stack) == 1); kv_drop(ast_stack, 1); } else if ((*cur_node)->children != NULL) { #ifndef NDEBUG const uint8_t maxchildren = node_maxchildren[(*cur_node)->type]; assert(maxchildren > 0); assert(maxchildren <= 2); assert(maxchildren == 1 ? (*cur_node)->children->next == NULL : ((*cur_node)->children->next == NULL || (*cur_node)->children->next->next == NULL)); #endif kvi_push(ast_stack, &(*cur_node)->children); } else if ((*cur_node)->next != NULL) { kvi_push(ast_stack, &(*cur_node)->next); } else if (*cur_node != NULL) { kv_drop(ast_stack, 1); switch ((*cur_node)->type) { case kExprNodeDoubleQuotedString: case kExprNodeSingleQuotedString: xfree((*cur_node)->data.str.value); break; case kExprNodeMissing: case kExprNodeOpMissing: case kExprNodeTernary: case kExprNodeTernaryValue: case kExprNodeRegister: case kExprNodeSubscript: case kExprNodeListLiteral: case kExprNodeUnaryPlus: case kExprNodeBinaryPlus: case kExprNodeNested: case kExprNodeCall: case kExprNodePlainIdentifier: case kExprNodePlainKey: case kExprNodeComplexIdentifier: case kExprNodeUnknownFigure: case kExprNodeLambda: case kExprNodeDictLiteral: case kExprNodeCurlyBracesIdentifier: case kExprNodeAssignment: case kExprNodeComma: case kExprNodeColon: case kExprNodeArrow: case kExprNodeComparison: case kExprNodeConcat: case kExprNodeConcatOrSubscript: case kExprNodeInteger: case kExprNodeFloat: case kExprNodeOr: case kExprNodeAnd: case kExprNodeUnaryMinus: case kExprNodeBinaryMinus: case kExprNodeNot: case kExprNodeMultiplication: case kExprNodeDivision: case kExprNodeMod: case kExprNodeOption: case kExprNodeEnvironment: break; } xfree(*cur_node); *cur_node = NULL; } } kvi_destroy(ast_stack); } // Binary operator precedence and associativity: // // Operator | Precedence | Associativity // ---------+------------+----------------- // || | 2 | left // && | 3 | left // cmp* | 4 | not associative // + - . | 5 | left // * / % | 6 | left // // * comparison operators: // // == ==# ==? != !=# !=? // =~ =~# =~? !~ !~# !~? // > ># >? <= <=# <=? // < <# = >=# >=? // is is# is? isnot isnot# isnot? /// Allocate a new node and set some of the values /// /// @param[in] type Node type to allocate. /// @param[in] level Node level to allocate static inline ExprASTNode *viml_pexpr_new_node(const ExprASTNodeType type) FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_MALLOC { ExprASTNode *ret = xmalloc(sizeof(*ret)); ret->type = type; ret->children = NULL; ret->next = NULL; return ret; } static struct { ExprOpLvl lvl; ExprOpAssociativity ass; } node_type_to_node_props[] = { [kExprNodeMissing] = { kEOpLvlInvalid, kEOpAssNo, }, [kExprNodeOpMissing] = { kEOpLvlMultiplication, kEOpAssNo }, [kExprNodeNested] = { kEOpLvlParens, kEOpAssNo }, // Note: below nodes are kEOpLvlSubscript for “binary operator” itself, but // kEOpLvlParens when it comes to inside the parenthesis. [kExprNodeCall] = { kEOpLvlParens, kEOpAssNo }, [kExprNodeSubscript] = { kEOpLvlParens, kEOpAssNo }, [kExprNodeUnknownFigure] = { kEOpLvlParens, kEOpAssLeft }, [kExprNodeLambda] = { kEOpLvlParens, kEOpAssNo }, [kExprNodeDictLiteral] = { kEOpLvlParens, kEOpAssNo }, [kExprNodeListLiteral] = { kEOpLvlParens, kEOpAssNo }, [kExprNodeArrow] = { kEOpLvlArrow, kEOpAssNo }, // Right associativity for comma because this means easier access to arguments // list, etc: for "[a, b, c, d]" you can access "a" in one step if it is // represented as "list(comma(a, comma(b, comma(c, d))))" then if it is // "list(comma(comma(comma(a, b), c), d))" in which case you will need to // traverse all three comma() structures. And with comma operator (including // actual comma operator from C which is not present in VimL) nobody cares // about associativity, only about order of execution. [kExprNodeComma] = { kEOpLvlComma, kEOpAssRight }, // Colons are not eligible for chaining, so nobody cares about associativity. [kExprNodeColon] = { kEOpLvlColon, kEOpAssNo }, [kExprNodeTernary] = { kEOpLvlTernary, kEOpAssRight }, [kExprNodeOr] = { kEOpLvlOr, kEOpAssLeft }, [kExprNodeAnd] = { kEOpLvlAnd, kEOpAssLeft }, [kExprNodeTernaryValue] = { kEOpLvlTernaryValue, kEOpAssRight }, [kExprNodeComparison] = { kEOpLvlComparison, kEOpAssRight }, [kExprNodeBinaryPlus] = { kEOpLvlAddition, kEOpAssLeft }, [kExprNodeBinaryMinus] = { kEOpLvlAddition, kEOpAssLeft }, [kExprNodeConcat] = { kEOpLvlAddition, kEOpAssLeft }, [kExprNodeMultiplication] = { kEOpLvlMultiplication, kEOpAssLeft }, [kExprNodeDivision] = { kEOpLvlMultiplication, kEOpAssLeft }, [kExprNodeMod] = { kEOpLvlMultiplication, kEOpAssLeft }, [kExprNodeUnaryPlus] = { kEOpLvlUnary, kEOpAssNo }, [kExprNodeUnaryMinus] = { kEOpLvlUnary, kEOpAssNo }, [kExprNodeNot] = { kEOpLvlUnary, kEOpAssNo }, [kExprNodeConcatOrSubscript] = { kEOpLvlSubscript, kEOpAssLeft }, [kExprNodeCurlyBracesIdentifier] = { kEOpLvlComplexIdentifier, kEOpAssLeft }, [kExprNodeAssignment] = { kEOpLvlAssignment, kEOpAssLeft }, [kExprNodeComplexIdentifier] = { kEOpLvlValue, kEOpAssLeft }, [kExprNodePlainIdentifier] = { kEOpLvlValue, kEOpAssNo }, [kExprNodePlainKey] = { kEOpLvlValue, kEOpAssNo }, [kExprNodeRegister] = { kEOpLvlValue, kEOpAssNo }, [kExprNodeInteger] = { kEOpLvlValue, kEOpAssNo }, [kExprNodeFloat] = { kEOpLvlValue, kEOpAssNo }, [kExprNodeDoubleQuotedString] = { kEOpLvlValue, kEOpAssNo }, [kExprNodeSingleQuotedString] = { kEOpLvlValue, kEOpAssNo }, [kExprNodeOption] = { kEOpLvlValue, kEOpAssNo }, [kExprNodeEnvironment] = { kEOpLvlValue, kEOpAssNo }, }; /// Get AST node priority level /// /// Used primary to reduce line length, so keep the name short. /// /// @param[in] node Node to get priority for. /// /// @return Node priority level. static inline ExprOpLvl node_lvl(const ExprASTNode node) FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT { return node_type_to_node_props[node.type].lvl; } /// Get AST node associativity, to be used for operator nodes primary /// /// Used primary to reduce line length, so keep the name short. /// /// @param[in] node Node to get priority for. /// /// @return Node associativity. static inline ExprOpAssociativity node_ass(const ExprASTNode node) FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT { return node_type_to_node_props[node.type].ass; } /// Handle binary operator /// /// This function is responsible for handling priority levels as well. /// /// @param[in] pstate Parser state, used for error reporting. /// @param ast_stack AST stack. May be popped of some values and will /// definitely receive new ones. /// @param bop_node New node to handle. /// @param[out] want_node_p New value of want_node. /// @param[out] ast_err Location where error is saved, if any. /// /// @return True if no errors occurred, false otherwise. static bool viml_pexpr_handle_bop(const ParserState *const pstate, ExprASTStack *const ast_stack, ExprASTNode *const bop_node, ExprASTWantedNode *const want_node_p, ExprASTError *const ast_err) FUNC_ATTR_NONNULL_ALL { bool ret = true; ExprASTNode **top_node_p = NULL; ExprASTNode *top_node; ExprOpLvl top_node_lvl; ExprOpAssociativity top_node_ass; assert(kv_size(*ast_stack)); const ExprOpLvl bop_node_lvl = ((bop_node->type == kExprNodeCall || bop_node->type == kExprNodeSubscript) ? kEOpLvlSubscript : node_lvl(*bop_node)); #ifndef NDEBUG const ExprOpAssociativity bop_node_ass = ( (bop_node->type == kExprNodeCall || bop_node->type == kExprNodeSubscript) ? kEOpAssLeft : node_ass(*bop_node)); #endif do { ExprASTNode **new_top_node_p = kv_last(*ast_stack); ExprASTNode *new_top_node = *new_top_node_p; assert(new_top_node != NULL); const ExprOpLvl new_top_node_lvl = node_lvl(*new_top_node); const ExprOpAssociativity new_top_node_ass = node_ass(*new_top_node); assert(bop_node_lvl != new_top_node_lvl || bop_node_ass == new_top_node_ass); if (top_node_p != NULL && ((bop_node_lvl > new_top_node_lvl || (bop_node_lvl == new_top_node_lvl && new_top_node_ass == kEOpAssNo)))) { break; } kv_drop(*ast_stack, 1); top_node_p = new_top_node_p; top_node = new_top_node; top_node_lvl = new_top_node_lvl; top_node_ass = new_top_node_ass; if (bop_node_lvl == top_node_lvl && top_node_ass == kEOpAssRight) { break; } } while (kv_size(*ast_stack)); if (top_node_ass == kEOpAssLeft || top_node_lvl != bop_node_lvl) { // outer(op(x,y)) -> outer(new_op(op(x,y),*)) // // Before: top_node_p = outer(*), points to op(x,y) // Other stack elements unknown // // After: top_node_p = outer(*), points to new_op(op(x,y)) // &bop_node->children->next = new_op(op(x,y),*), points to NULL *top_node_p = bop_node; bop_node->children = top_node; assert(bop_node->children->next == NULL); kvi_push(*ast_stack, top_node_p); kvi_push(*ast_stack, &bop_node->children->next); } else { assert(top_node_lvl == bop_node_lvl && top_node_ass == kEOpAssRight); assert(top_node->children != NULL && top_node->children->next != NULL); // outer(op(x,y)) -> outer(op(x,new_op(y,*))) // // Before: top_node_p = outer(*), points to op(x,y) // Other stack elements unknown // // After: top_node_p = outer(*), points to op(x,new_op(y)) // &top_node->children->next = op(x,*), points to new_op(y) // &bop_node->children->next = new_op(y,*), points to NULL bop_node->children = top_node->children->next; top_node->children->next = bop_node; assert(bop_node->children->next == NULL); kvi_push(*ast_stack, top_node_p); kvi_push(*ast_stack, &top_node->children->next); kvi_push(*ast_stack, &bop_node->children->next); // TODO(ZyX-I): Make this not error, but treat like Python does if (bop_node->type == kExprNodeComparison) { east_set_error(pstate, ast_err, _("E15: Operator is not associative: %.*s"), bop_node->start); ret = false; } } *want_node_p = kENodeValue; return ret; } /// ParserPosition literal based on ParserPosition pos with columns shifted /// /// Function does not check whether resulting position is valid. /// /// @param[in] pos Position to shift. /// @param[in] shift Number of bytes to shift. /// /// @return Shifted position. static inline ParserPosition shifted_pos(const ParserPosition pos, const size_t shift) FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT { return (ParserPosition) { .line = pos.line, .col = pos.col + shift }; } /// ParserPosition literal based on ParserPosition pos with specified column /// /// Function does not check whether remaining position is valid. /// /// @param[in] pos Position to adjust. /// @param[in] new_col New column. /// /// @return Shifted position. static inline ParserPosition recol_pos(const ParserPosition pos, const size_t new_col) FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT { return (ParserPosition) { .line = pos.line, .col = new_col }; } /// Get highlight group name #define HL(g) (is_invalid ? "NvimInvalid" #g : "Nvim" #g) /// Highlight current token with the given group #define HL_CUR_TOKEN(g) \ viml_parser_highlight(pstate, cur_token.start, cur_token.len, \ HL(g)) /// Allocate new node, saving some values #define NEW_NODE(type) \ viml_pexpr_new_node(type) /// Set position of the given node to position from the given token /// /// @param cur_node Node to modify. /// @param cur_token Token to set position from. #define POS_FROM_TOKEN(cur_node, cur_token) \ do { \ (cur_node)->start = cur_token.start; \ (cur_node)->len = cur_token.len; \ } while (0) /// Allocate new node and set its position from the current token /// /// If previous token happened to contain spacing then it will be included. /// /// @param cur_node Variable to save allocated node to. /// @param typ Node type. #define NEW_NODE_WITH_CUR_POS(cur_node, typ) \ do { \ (cur_node) = NEW_NODE(typ); \ POS_FROM_TOKEN((cur_node), cur_token); \ if (prev_token.type == kExprLexSpacing) { \ (cur_node)->start = prev_token.start; \ (cur_node)->len += prev_token.len; \ } \ } while (0) /// Check whether it is possible to have next expression after current /// /// For :echo: `:echo @a @a` is a valid expression. `:echo (@a @a)` is not. #define MAY_HAVE_NEXT_EXPR \ (kv_size(ast_stack) == 1) /// Add operator node /// /// @param[in] cur_node Node to add. #define ADD_OP_NODE(cur_node) \ is_invalid |= !viml_pexpr_handle_bop(pstate, &ast_stack, cur_node, \ &want_node, &ast.err) /// Record missing operator: for things like /// /// :echo @a @a /// /// (allowed) or /// /// :echo (@a @a) /// /// (parsed as OpMissing(@a, @a)). #define OP_MISSING \ do { \ if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) { \ /* Multiple expressions allowed, return without calling */ \ /* viml_parser_advance(). */ \ goto viml_pexpr_parse_end; \ } else { \ assert(*top_node_p != NULL); \ ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Missing operator: %.*s")); \ NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOpMissing); \ cur_node->len = 0; \ ADD_OP_NODE(cur_node); \ goto viml_pexpr_parse_process_token; \ } \ } while (0) /// Record missing value: for things like "* 5" /// /// @param[in] msg Error message. #define ADD_VALUE_IF_MISSING(msg) \ do { \ if (want_node == kENodeValue) { \ ERROR_FROM_TOKEN_AND_MSG(cur_token, (msg)); \ NEW_NODE_WITH_CUR_POS((*top_node_p), kExprNodeMissing); \ (*top_node_p)->len = 0; \ want_node = kENodeOperator; \ } \ } while (0) /// Set AST error, unless AST already is not correct /// /// @param[out] ret_ast AST to set error in. /// @param[in] pstate Parser state, used to get error message argument. /// @param[in] msg Error message, assumed to be already translated and /// containing a single %token "%.*s". /// @param[in] start Position at which error occurred. static inline void east_set_error(const ParserState *const pstate, ExprASTError *const ret_ast_err, const char *const msg, const ParserPosition start) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE { if (ret_ast_err->msg != NULL) { return; } const ParserLine pline = pstate->reader.lines.items[start.line]; ret_ast_err->msg = msg; ret_ast_err->arg_len = (int)(pline.size - start.col); ret_ast_err->arg = pline.data ? pline.data + start.col : NULL; } /// Set error from the given token and given message #define ERROR_FROM_TOKEN_AND_MSG(cur_token, msg) \ do { \ is_invalid = true; \ east_set_error(pstate, &ast.err, msg, cur_token.start); \ } while (0) /// Like #ERROR_FROM_TOKEN_AND_MSG, but gets position from a node #define ERROR_FROM_NODE_AND_MSG(node, msg) \ do { \ is_invalid = true; \ east_set_error(pstate, &ast.err, msg, node->start); \ } while (0) /// Set error from the given kExprLexInvalid token #define ERROR_FROM_TOKEN(cur_token) \ ERROR_FROM_TOKEN_AND_MSG(cur_token, cur_token.data.err.msg) /// Select figure brace type, altering highlighting as well if needed /// /// @param[out] node Node to modify type. /// @param[in] new_type New type, one of ExprASTNodeType values without /// kExprNode prefix. /// @param[in] hl Corresponding highlighting, passed as an argument to #HL. #define SELECT_FIGURE_BRACE_TYPE(node, new_type, hl) \ do { \ ExprASTNode *const node_ = (node); \ assert(node_->type == kExprNodeUnknownFigure \ || node_->type == kExprNode##new_type); \ node_->type = kExprNode##new_type; \ if (pstate->colors) { \ kv_A(*pstate->colors, node_->data.fig.opening_hl_idx).group = \ HL(hl); \ } \ } while (0) /// Add identifier which should constitute complex identifier node /// /// This one is to be called only in case want_node is kENodeOperator. /// /// @param new_ident_node_code Code used to create a new identifier node and /// update want_node and ast_stack, without /// a trailing semicolon. /// @param hl Highlighting name to use, passed as an argument to #HL. #define ADD_IDENT(new_ident_node_code, hl) \ do { \ assert(want_node == kENodeOperator); \ /* Operator: may only be curly braces name, but only under certain */ \ /* conditions. */ \ /* First condition is that there is no space before a part of complex */ \ /* identifier. */ \ if (prev_token.type == kExprLexSpacing) { \ OP_MISSING; \ } \ switch ((*top_node_p)->type) { \ /* Second is that previous node is one of the identifiers: */ \ /* complex, plain, curly braces. */ \ /* TODO(ZyX-I): Extend syntax to allow ${expr}. This is needed to */ \ /* handle environment variables like those bash uses for */ \ /* `export -f`: their names consist not only of alphanumeric */ \ /* characters. */ \ case kExprNodeComplexIdentifier: \ case kExprNodePlainIdentifier: \ case kExprNodeCurlyBracesIdentifier: { \ NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComplexIdentifier); \ cur_node->len = 0; \ cur_node->children = *top_node_p; \ *top_node_p = cur_node; \ kvi_push(ast_stack, &cur_node->children->next); \ ExprASTNode **const new_top_node_p = kv_last(ast_stack); \ assert(*new_top_node_p == NULL); \ new_ident_node_code; \ *new_top_node_p = cur_node; \ HL_CUR_TOKEN(hl); \ break; \ } \ default: { \ OP_MISSING; \ break; \ } \ } \ } while (0) /// Determine whether given parse type is an assignment /// /// @param[in] pt Checked parse type. /// /// @return true if parsing an assignment, false otherwise. static inline bool pt_is_assignment(const ExprASTParseType pt) FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT { return (pt == kEPTAssignment || pt == kEPTSingleAssignment); } /// Structure used to define “string shifts” necessary to map string /// highlighting to actual strings. typedef struct { size_t start; ///< Where special character starts in original string. size_t orig_len; ///< Length of orininal string (e.g. 4 for "\x80"). size_t act_len; ///< Length of resulting character(s) (e.g. 1 for "\x80"). bool escape_not_known; ///< True if escape sequence in original is not known. } StringShift; /// Parse and highlight single- or double-quoted string /// /// Function is supposed to detect and highlight regular expressions (but does /// not do now). /// /// @param[out] pstate Parser state which also contains a place where /// highlighting is saved. /// @param[out] node Node where string parsing results are saved. /// @param[in] token Token to highlight. /// @param[in] ast_stack Parser AST stack, used to detect whether current /// string is a regex. /// @param[in] is_invalid Whether currently processed token is not valid. static void parse_quoted_string(ParserState *const pstate, ExprASTNode *const node, const LexExprToken token, const ExprASTStack *ast_stack, const bool is_invalid) FUNC_ATTR_NONNULL_ALL { const ParserLine pline = pstate->reader.lines.items[token.start.line]; const char *const s = pline.data + token.start.col; const char *const e = s + token.len - token.data.str.closed; const char *p = s + 1; const bool is_double = (token.type == kExprLexDoubleQuotedString); size_t size = token.len - token.data.str.closed - 1; kvec_withinit_t(StringShift, 16) shifts; kvi_init(shifts); if (!is_double) { viml_parser_highlight(pstate, token.start, 1, HL(SingleQuote)); while (p < e) { const char *const chunk_e = memchr(p, '\'', (size_t)(e - p)); if (chunk_e == NULL) { break; } size--; p = chunk_e + 2; if (pstate->colors) { kvi_push(shifts, ((StringShift) { .start = token.start.col + (size_t)(chunk_e - s), .orig_len = 2, .act_len = 1, .escape_not_known = false, })); } } node->data.str.size = size; if (size == 0) { node->data.str.value = NULL; } else { char *v_p; v_p = node->data.str.value = xmallocz(size); p = s + 1; while (p < e) { const char *const chunk_e = memchr(p, '\'', (size_t)(e - p)); if (chunk_e == NULL) { memcpy(v_p, p, (size_t)(e - p)); break; } memcpy(v_p, p, (size_t)(chunk_e - p)); v_p += (size_t)(chunk_e - p) + 1; v_p[-1] = '\''; p = chunk_e + 2; } } } else { viml_parser_highlight(pstate, token.start, 1, HL(DoubleQuote)); for (p = s + 1; p < e; p++) { if (*p == '\\' && p + 1 < e) { p++; if (p + 1 == e) { size--; break; } switch (*p) { // A "\" form occupies at least 4 characters, and produces up to // to 9 characters (6 for the char and 3 for a modifier): // reserve space for 5 extra, but do not compute actual length // just now, it would be costly. case '<': size += 5; break; // Hexadecimal, always single byte, but at least three bytes each. case 'x': case 'X': size--; if (ascii_isxdigit(p[1])) { size--; if (p + 2 < e && ascii_isxdigit(p[2])) { size--; } } break; // Unicode // // \uF takes 1 byte which is 2 bytes less then escape sequence. // \uFF: 2 bytes, 2 bytes less. // \uFFF: 3 bytes, 2 bytes less. // \uFFFF: 3 bytes, 3 bytes less. // \UFFFFF: 4 bytes, 3 bytes less. // \UFFFFFF: 5 bytes, 3 bytes less. // \UFFFFFFF: 6 bytes, 3 bytes less. // \U7FFFFFFF: 6 bytes, 4 bytes less. case 'u': case 'U': { const char *const esc_start = p; size_t n = (*p == 'u' ? 4 : 8); int nr = 0; p++; while (p + 1 < e && n-- && ascii_isxdigit(p[1])) { p++; nr = (nr << 4) + hex2nr(*p); } // Escape length: (esc_start - 1) points to "\\", esc_start to "u" // or "U", p to the byte after last byte. So escape sequence // occupies p - (esc_start - 1), but it stands for a utf_char2len // bytes. size -= (size_t)((p - (esc_start - 1)) - utf_char2len(nr)); p--; break; } // Octal, always single byte, but at least two bytes each. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': size--; p++; if (*p >= '0' && *p <= '7') { size--; p++; if (p < e && *p >= '0' && *p <= '7') { size--; p++; } } break; default: size--; break; } } } if (size == 0) { node->data.str.value = NULL; node->data.str.size = 0; } else { char *v_p; v_p = node->data.str.value = xmalloc(size); p = s + 1; while (p < e) { const char *const chunk_e = memchr(p, '\\', (size_t)(e - p)); if (chunk_e == NULL) { memcpy(v_p, p, (size_t)(e - p)); v_p += e - p; break; } memcpy(v_p, p, (size_t)(chunk_e - p)); v_p += (size_t)(chunk_e - p); p = chunk_e + 1; if (p == e) { *v_p++ = '\\'; break; } bool is_unknown = false; const char *const v_p_start = v_p; switch (*p) { #define SINGLE_CHAR_ESC(ch, real_ch) \ case ch: { \ *v_p++ = real_ch; \ p++; \ break; \ } SINGLE_CHAR_ESC('b', BS) SINGLE_CHAR_ESC('e', ESC) SINGLE_CHAR_ESC('f', FF) SINGLE_CHAR_ESC('n', NL) SINGLE_CHAR_ESC('r', CAR) SINGLE_CHAR_ESC('t', TAB) SINGLE_CHAR_ESC('"', '"') SINGLE_CHAR_ESC('\\', '\\') #undef SINGLE_CHAR_ESC // Hexadecimal or unicode. case 'X': case 'x': case 'u': case 'U': if (p + 1 < e && ascii_isxdigit(p[1])) { size_t n; int nr; bool is_hex = (*p == 'x' || *p == 'X'); if (is_hex) { n = 2; } else if (*p == 'u') { n = 4; } else { n = 8; } nr = 0; while (p + 1 < e && n-- && ascii_isxdigit(p[1])) { p++; nr = (nr << 4) + hex2nr(*p); } p++; if (is_hex) { *v_p++ = (char)nr; } else { v_p += utf_char2bytes(nr, v_p); } } else { is_unknown = true; *v_p++ = *p; p++; } break; // Octal: "\1", "\12", "\123". case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { uint8_t ch = (uint8_t)(*p++ - '0'); if (p < e && *p >= '0' && *p <= '7') { ch = (uint8_t)((ch << 3) + *p++ - '0'); if (p < e && *p >= '0' && *p <= '7') { ch = (uint8_t)((ch << 3) + *p++ - '0'); } } *v_p++ = (char)ch; break; } // Special key, e.g.: "\" case '<': { int flags = FSK_KEYCODE | FSK_IN_STRING; if (p[1] != '*') { flags |= FSK_SIMPLIFY; } const size_t special_len = trans_special((const char_u **)&p, (size_t)(e - p), (char_u *)v_p, flags, false, NULL); if (special_len != 0) { v_p += special_len; } else { is_unknown = true; mb_copy_char(&p, &v_p); } break; } default: is_unknown = true; mb_copy_char(&p, &v_p); break; } if (pstate->colors) { kvi_push(shifts, ((StringShift) { .start = token.start.col + (size_t)(chunk_e - s), .orig_len = (size_t)(p - chunk_e), .act_len = (size_t)(v_p - (char *)v_p_start), .escape_not_known = is_unknown, })); } } node->data.str.size = (size_t)(v_p - node->data.str.value); } } if (pstate->colors) { // TODO(ZyX-I): use ast_stack to determine and highlight regular expressions // TODO(ZyX-I): use ast_stack to determine and highlight printf format str // TODO(ZyX-I): use ast_stack to determine and highlight expression strings size_t next_col = token.start.col + 1; const char *const body_str = (is_double ? HL(DoubleQuotedBody) : HL(SingleQuotedBody)); const char *const esc_str = (is_double ? HL(DoubleQuotedEscape) : HL(SingleQuotedQuote)); const char *const ukn_esc_str = (is_double ? HL(DoubleQuotedUnknownEscape) : HL(SingleQuotedUnknownEscape)); for (size_t i = 0; i < kv_size(shifts); i++) { const StringShift cur_shift = kv_A(shifts, i); if (cur_shift.start > next_col) { viml_parser_highlight(pstate, recol_pos(token.start, next_col), cur_shift.start - next_col, body_str); } viml_parser_highlight(pstate, recol_pos(token.start, cur_shift.start), cur_shift.orig_len, (cur_shift.escape_not_known ? ukn_esc_str : esc_str)); next_col = cur_shift.start + cur_shift.orig_len; } if (next_col - token.start.col < token.len - token.data.str.closed) { viml_parser_highlight(pstate, recol_pos(token.start, next_col), (token.start.col + token.len - token.data.str.closed - next_col), body_str); } } if (token.data.str.closed) { if (is_double) { viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1), 1, HL(DoubleQuote)); } else { viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1), 1, HL(SingleQuote)); } } kvi_destroy(shifts); } /// Additional flags to pass to lexer depending on want_node static const int want_node_to_lexer_flags[] = { [kENodeValue] = kELFlagIsNotCmp, [kENodeOperator] = kELFlagForbidScope, }; /// Number of characters to highlight as NumberPrefix depending on the base static const uint8_t base_to_prefix_length[] = { [2] = 2, [8] = 1, [10] = 0, [16] = 2, }; /// Parse one VimL expression /// /// @param pstate Parser state. /// @param[in] flags Additional flags, see ExprParserFlags /// /// @return Parsed AST. ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL { ExprAST ast = { .err = { .msg = NULL, .arg_len = 0, .arg = NULL, }, .root = NULL, }; // Expression stack contains current branch in AST tree: that is // - Stack item 0 contains root of the tree, i.e. &ast->root. // - Stack item i points to the previous stack items’ last child. // // When parser expects “value” node that is something like identifier or "[" // (list start) last stack item contains NULL. Otherwise last stack item is // supposed to contain last “finished” value: e.g. "1" or "+(1, 1)" (node // representing "1+1"). ExprASTStack ast_stack; kvi_init(ast_stack); kvi_push(ast_stack, &ast.root); ExprASTWantedNode want_node = kENodeValue; ExprASTParseTypeStack pt_stack; kvi_init(pt_stack); kvi_push(pt_stack, kEPTExpr); if (flags & kExprFlagsParseLet) { kvi_push(pt_stack, kEPTAssignment); } LexExprToken prev_token = { .type = kExprLexMissing }; bool highlighted_prev_spacing = false; // Lambda node, valid when parsing lambda arguments only. ExprASTNode *lambda_node = NULL; size_t asgn_level = 0; do { const bool is_concat_or_subscript = ( want_node == kENodeValue && kv_size(ast_stack) > 1 && (*kv_Z(ast_stack, 1))->type == kExprNodeConcatOrSubscript); const int lexer_additional_flags = ( kELFlagPeek | ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0) | ((want_node == kENodeValue && (kv_size(ast_stack) == 1 || ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat && ((*kv_Z(ast_stack, 1))->type != kExprNodeConcatOrSubscript)))) ? kELFlagAllowFloat : 0)); LexExprToken cur_token = viml_pexpr_next_token(pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags); if (cur_token.type == kExprLexEOC) { break; } LexExprTokenType tok_type = cur_token.type; const bool token_invalid = (tok_type == kExprLexInvalid); bool is_invalid = token_invalid; viml_pexpr_parse_process_token: // May use different flags this time. cur_token = viml_pexpr_next_token(pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags); if (tok_type == kExprLexSpacing) { if (is_invalid) { HL_CUR_TOKEN(Spacing); } else { // Do not do anything: let regular spacing be highlighted as normal. // This also allows later to highlight spacing as invalid. } goto viml_pexpr_parse_cycle_end; } else if (is_invalid && prev_token.type == kExprLexSpacing && !highlighted_prev_spacing) { viml_parser_highlight(pstate, prev_token.start, prev_token.len, HL(Spacing)); is_invalid = false; highlighted_prev_spacing = true; } const ParserLine pline = pstate->reader.lines.items[cur_token.start.line]; ExprASTNode **const top_node_p = kv_last(ast_stack); assert(kv_size(ast_stack) >= 1); ExprASTNode *cur_node = NULL; #ifndef NDEBUG const bool want_value = (want_node == kENodeValue); assert(want_value == (*top_node_p == NULL)); assert(kv_A(ast_stack, 0) == &ast.root); // Check that stack item i + 1 points to stack items’ i *last* child. for (size_t i = 0; i + 1 < kv_size(ast_stack); i++) { const bool item_null = (want_value && i + 2 == kv_size(ast_stack)); assert((&(*kv_A(ast_stack, i))->children == kv_A(ast_stack, i + 1) && (item_null ? (*kv_A(ast_stack, i))->children == NULL : (*kv_A(ast_stack, i))->children->next == NULL)) || ((&(*kv_A(ast_stack, i))->children->next == kv_A(ast_stack, i + 1)) && (item_null ? (*kv_A(ast_stack, i))->children->next == NULL : (*kv_A(ast_stack, i))->children->next->next == NULL))); } #endif // Note: in Vim whether expression "cond?d.a:2" is valid depends both on // "cond" and whether "d" is a dictionary: expression is valid if condition // is true and "d" is a dictionary (with "a" key or it will complain about // missing one, but this is not relevant); if any of the requirements is // broken then this thing is parsed as "d . a:2" yielding missing colon // error. This parser does not allow such ambiguity, especially because it // simply can’t: whether "d" is a dictionary is not known at the parsing // time. // // Here example will always contain a concat with "a:2" sucking colon, // making expression invalid both because there is no longer a spare colon // for ternary and because concatenating dictionary with anything is not // valid. There are more cases when this will make a difference though. const bool node_is_key = ( is_concat_or_subscript && (cur_token.type == kExprLexPlainIdentifier ? (!cur_token.data.var.autoload && cur_token.data.var.scope == kExprVarScopeMissing) : (cur_token.type == kExprLexNumber)) && prev_token.type != kExprLexSpacing); if (is_concat_or_subscript && !node_is_key) { // Note: in Vim "d. a" (this is the reason behind `prev_token.type != // kExprLexSpacing` part of the condition) as well as any other "d.{expr}" // where "{expr}" does not look like a key is invalid whenever "d" happens // to be a dictionary. Since parser has no idea whether preceding // expression is actually a dictionary it can’t outright reject anything, // so it turns kExprNodeConcatOrSubscript into kExprNodeConcat instead, // which will yield different errors then Vim does in a number of // circumstances, and in any case runtime and not parse time errors. (*kv_Z(ast_stack, 1))->type = kExprNodeConcat; } // Pop some stack pt_stack items in case of misplaced nodes. const bool is_single_assignment = kv_last(pt_stack) == kEPTSingleAssignment; switch (kv_last(pt_stack)) { case kEPTExpr: break; case kEPTLambdaArguments: if ((want_node == kENodeOperator && tok_type != kExprLexComma && tok_type != kExprLexArrow) || (want_node == kENodeValue && !(cur_token.type == kExprLexPlainIdentifier && cur_token.data.var.scope == kExprVarScopeMissing && !cur_token.data.var.autoload) && tok_type != kExprLexArrow)) { lambda_node->data.fig.type_guesses.allow_lambda = false; if (lambda_node->children != NULL && lambda_node->children->type == kExprNodeComma) { // If lambda has comma child this means that parser has already seen // at least "{arg1,", so node cannot possibly be anything, but // lambda. // Vim may give E121 or E720 in this case, but it does not look // right to have either because both are results of reevaluation // possibly-lambda node as a dictionary and here this is not going // to happen. ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected lambda arguments list or arrow: %.*s")); } else { // Else it may appear that possibly-lambda node is actually // a dictionary or curly-braces-name identifier. lambda_node = NULL; kv_drop(pt_stack, 1); } } break; case kEPTSingleAssignment: case kEPTAssignment: if (want_node == kENodeValue && tok_type != kExprLexBracket && tok_type != kExprLexPlainIdentifier && (tok_type != kExprLexFigureBrace || cur_token.data.brc.closing) && !(node_is_key && tok_type == kExprLexNumber) && tok_type != kExprLexEnv && tok_type != kExprLexOption && tok_type != kExprLexRegister) { ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected value part of assignment lvalue: %.*s")); kv_drop(pt_stack, 1); } else if (want_node == kENodeOperator && tok_type != kExprLexBracket && (tok_type != kExprLexFigureBrace || cur_token.data.brc.closing) && tok_type != kExprLexDot && (tok_type != kExprLexComma || !is_single_assignment) && tok_type != kExprLexAssignment // Curly brace identifiers: will contain plain identifier or // another curly brace in position where operator is wanted. && !((tok_type == kExprLexPlainIdentifier || (tok_type == kExprLexFigureBrace && !cur_token.data.brc.closing)) && prev_token.type != kExprLexSpacing)) { if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) { goto viml_pexpr_parse_end; } ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected assignment operator or subscript: %.*s")); kv_drop(pt_stack, 1); } assert(kv_size(pt_stack)); break; } assert(kv_size(pt_stack)); const ExprASTParseType cur_pt = kv_last(pt_stack); assert(lambda_node == NULL || cur_pt == kEPTLambdaArguments); switch (tok_type) { case kExprLexMissing: case kExprLexSpacing: case kExprLexEOC: abort(); case kExprLexInvalid: ERROR_FROM_TOKEN(cur_token); tok_type = cur_token.data.err.type; goto viml_pexpr_parse_process_token; case kExprLexRegister: { if (want_node == kENodeOperator) { // Register in operator position: e.g. @a @a OP_MISSING; } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeRegister); cur_node->data.reg.name = cur_token.data.reg.name; *top_node_p = cur_node; want_node = kENodeOperator; HL_CUR_TOKEN(Register); break; } #define SIMPLE_UB_OP(op) \ case kExprLex##op: { \ if (want_node == kENodeValue) { \ /* Value level: assume unary operator. */ \ NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnary##op); \ *top_node_p = cur_node; \ kvi_push(ast_stack, &cur_node->children); \ HL_CUR_TOKEN(Unary##op); \ } else { \ NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeBinary##op); \ ADD_OP_NODE(cur_node); \ HL_CUR_TOKEN(Binary##op); \ } \ want_node = kENodeValue; \ break; \ } SIMPLE_UB_OP(Plus) SIMPLE_UB_OP(Minus) #undef SIMPLE_UB_OP #define SIMPLE_B_OP(op, msg) \ case kExprLex##op: { \ ADD_VALUE_IF_MISSING(_("E15: Unexpected " msg ": %.*s")); \ NEW_NODE_WITH_CUR_POS(cur_node, kExprNode##op); \ HL_CUR_TOKEN(op); \ ADD_OP_NODE(cur_node); \ break; \ } SIMPLE_B_OP(Or, "or operator") SIMPLE_B_OP(And, "and operator") #undef SIMPLE_B_OP case kExprLexMultiplication: ADD_VALUE_IF_MISSING(_("E15: Unexpected multiplication-like operator: %.*s")); switch (cur_token.data.mul.type) { #define MUL_OP(lex_op_tail, node_op_tail) \ case kExprLexMul##lex_op_tail: { \ NEW_NODE_WITH_CUR_POS(cur_node, kExprNode##node_op_tail); \ HL_CUR_TOKEN(node_op_tail); \ break; \ } MUL_OP(Mul, Multiplication) MUL_OP(Div, Division) MUL_OP(Mod, Mod) #undef MUL_OP } ADD_OP_NODE(cur_node); break; case kExprLexOption: { if (want_node == kENodeOperator) { OP_MISSING; } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOption); if (cur_token.type == kExprLexInvalid) { assert(cur_token.len == 1 || (cur_token.len == 3 && pline.data[cur_token.start.col + 2] == ':')); cur_node->data.opt.ident = ( pline.data + cur_token.start.col + cur_token.len); cur_node->data.opt.ident_len = 0; cur_node->data.opt.scope = ( cur_token.len == 3 ? (ExprOptScope)pline.data[cur_token.start.col + 1] : kExprOptScopeUnspecified); } else { cur_node->data.opt.ident = cur_token.data.opt.name; cur_node->data.opt.ident_len = cur_token.data.opt.len; cur_node->data.opt.scope = cur_token.data.opt.scope; } *top_node_p = cur_node; want_node = kENodeOperator; viml_parser_highlight(pstate, cur_token.start, 1, HL(OptionSigil)); const size_t scope_shift = ( cur_token.data.opt.scope == kExprOptScopeUnspecified ? 0 : 2); if (scope_shift) { viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1, HL(OptionScope)); viml_parser_highlight(pstate, shifted_pos(cur_token.start, 2), 1, HL(OptionScopeDelimiter)); } viml_parser_highlight(pstate, shifted_pos(cur_token.start, scope_shift + 1), cur_token.len - (scope_shift + 1), HL(OptionName)); break; } case kExprLexEnv: if (want_node == kENodeOperator) { OP_MISSING; } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeEnvironment); cur_node->data.env.ident = pline.data + cur_token.start.col + 1; cur_node->data.env.ident_len = cur_token.len - 1; if (cur_node->data.env.ident_len == 0) { ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Environment variable name missing")); } *top_node_p = cur_node; want_node = kENodeOperator; viml_parser_highlight(pstate, cur_token.start, 1, HL(EnvironmentSigil)); viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), cur_token.len - 1, HL(EnvironmentName)); break; case kExprLexNot: if (want_node == kENodeOperator) { OP_MISSING; } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNot); *top_node_p = cur_node; kvi_push(ast_stack, &cur_node->children); HL_CUR_TOKEN(Not); break; case kExprLexComparison: ADD_VALUE_IF_MISSING(_("E15: Expected value, got comparison operator: %.*s")); NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComparison); if (cur_token.type == kExprLexInvalid) { cur_node->data.cmp.ccs = kCCStrategyUseOption; cur_node->data.cmp.type = kExprCmpEqual; cur_node->data.cmp.inv = false; } else { cur_node->data.cmp.ccs = cur_token.data.cmp.ccs; cur_node->data.cmp.type = cur_token.data.cmp.type; cur_node->data.cmp.inv = cur_token.data.cmp.inv; } ADD_OP_NODE(cur_node); if (cur_token.data.cmp.ccs != kCCStrategyUseOption) { viml_parser_highlight(pstate, cur_token.start, cur_token.len - 1, HL(Comparison)); viml_parser_highlight(pstate, shifted_pos(cur_token.start, cur_token.len - 1), 1, HL(ComparisonModifier)); } else { HL_CUR_TOKEN(Comparison); } want_node = kENodeValue; break; case kExprLexComma: assert(!(want_node == kENodeValue && cur_pt == kEPTLambdaArguments)); if (want_node == kENodeValue) { // Value level: comma appearing here is not valid. // Note: in Vim string(,x) will give E116, this is not the case here. ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected value, got comma: %.*s")); NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing); cur_node->len = 0; *top_node_p = cur_node; want_node = kENodeOperator; } if (cur_pt == kEPTLambdaArguments) { assert(lambda_node != NULL); assert(lambda_node->data.fig.type_guesses.allow_lambda); SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda); } if (kv_size(ast_stack) < 2) { goto viml_pexpr_parse_invalid_comma; } for (size_t i = 1; i < kv_size(ast_stack); i++) { ExprASTNode *const *const eastnode_p = (ExprASTNode *const *)kv_Z(ast_stack, i); const ExprASTNodeType eastnode_type = (*eastnode_p)->type; const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p); if (eastnode_type == kExprNodeLambda) { assert(cur_pt == kEPTLambdaArguments && want_node == kENodeOperator); break; } else if (eastnode_type == kExprNodeDictLiteral || eastnode_type == kExprNodeListLiteral || eastnode_type == kExprNodeCall) { break; } else if (eastnode_type == kExprNodeComma || eastnode_type == kExprNodeColon || eastnode_lvl > kEOpLvlComma) { // Do nothing } else { viml_pexpr_parse_invalid_comma: ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Comma outside of call, lambda or literal: %.*s")); break; } if (i == kv_size(ast_stack) - 1) { goto viml_pexpr_parse_invalid_comma; } } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComma); ADD_OP_NODE(cur_node); HL_CUR_TOKEN(Comma); break; #define EXP_VAL_COLON "E15: Expected value, got colon: %.*s" case kExprLexColon: { bool is_ternary = false; if (kv_size(ast_stack) < 2) { goto viml_pexpr_parse_invalid_colon; } bool can_be_ternary = true; bool is_subscript = false; for (size_t i = 1; i < kv_size(ast_stack); i++) { ExprASTNode *const *const eastnode_p = (ExprASTNode *const *)kv_Z(ast_stack, i); const ExprASTNodeType eastnode_type = (*eastnode_p)->type; const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p); STATIC_ASSERT(kEOpLvlTernary > kEOpLvlComma, "Unexpected operator priorities"); if (can_be_ternary && eastnode_type == kExprNodeTernaryValue && !(*eastnode_p)->data.ter.got_colon) { kv_drop(ast_stack, i); (*eastnode_p)->start = cur_token.start; (*eastnode_p)->len = cur_token.len; if (prev_token.type == kExprLexSpacing) { (*eastnode_p)->start = prev_token.start; (*eastnode_p)->len += prev_token.len; } is_ternary = true; (*eastnode_p)->data.ter.got_colon = true; ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); assert((*eastnode_p)->children != NULL); assert((*eastnode_p)->children->next == NULL); kvi_push(ast_stack, &(*eastnode_p)->children->next); break; } else if (eastnode_type == kExprNodeUnknownFigure) { SELECT_FIGURE_BRACE_TYPE(*eastnode_p, DictLiteral, Dict); break; } else if (eastnode_type == kExprNodeDictLiteral) { break; } else if (eastnode_type == kExprNodeSubscript) { is_subscript = true; // can_be_ternary = false; assert(!is_ternary); break; } else if (eastnode_type == kExprNodeColon) { goto viml_pexpr_parse_invalid_colon; } else if (eastnode_lvl >= kEOpLvlTernaryValue) { // Do nothing } else if (eastnode_lvl >= kEOpLvlComma) { can_be_ternary = false; } else { goto viml_pexpr_parse_invalid_colon; } if (i == kv_size(ast_stack) - 1) { goto viml_pexpr_parse_invalid_colon; } } if (is_subscript) { assert(kv_size(ast_stack) > 1); // Colon immediately following subscript start: it is empty subscript // part like a[:2]. if (want_node == kENodeValue && (*kv_Z(ast_stack, 1))->type == kExprNodeSubscript) { NEW_NODE_WITH_CUR_POS(*top_node_p, kExprNodeMissing); (*top_node_p)->len = 0; want_node = kENodeOperator; } else { ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); ADD_OP_NODE(cur_node); HL_CUR_TOKEN(SubscriptColon); } else { goto viml_pexpr_parse_valid_colon; viml_pexpr_parse_invalid_colon: ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Colon outside of dictionary or ternary operator: %.*s")); viml_pexpr_parse_valid_colon: ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); if (is_ternary) { HL_CUR_TOKEN(TernaryColon); } else { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); ADD_OP_NODE(cur_node); HL_CUR_TOKEN(Colon); } } want_node = kENodeValue; break; } #undef EXP_VAL_COLON case kExprLexBracket: if (cur_token.data.brc.closing) { ExprASTNode **new_top_node_p = NULL; // Always drop the topmost value: // // 1. When want_node != kENodeValue topmost item on stack is // a *finished* left operand, which may as well be "{@a}" which // needs not be finished again. // 2. Otherwise it is pointing to NULL what nobody wants. kv_drop(ast_stack, 1); if (!kv_size(ast_stack)) { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral); cur_node->len = 0; if (want_node != kENodeValue) { cur_node->children = *top_node_p; } *top_node_p = cur_node; goto viml_pexpr_parse_bracket_closing_error; } if (want_node == kENodeValue) { // It is OK to want value if // // 1. It is empty list literal, in which case top node will be // ListLiteral. // 2. It is list literal with trailing comma, in which case top node // will be that comma. // 3. It is subscript with colon, but without one of the values: // e.g. "a[:]", "a[1:]", top node will be colon in this case. if ((*kv_last(ast_stack))->type != kExprNodeListLiteral && (*kv_last(ast_stack))->type != kExprNodeComma && (*kv_last(ast_stack))->type != kExprNodeColon) { ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected value, got closing bracket: %.*s")); } } do { new_top_node_p = kv_pop(ast_stack); } while (kv_size(ast_stack) && (new_top_node_p == NULL || ((*new_top_node_p)->type != kExprNodeListLiteral && (*new_top_node_p)->type != kExprNodeSubscript))); ExprASTNode *new_top_node = *new_top_node_p; switch (new_top_node->type) { case kExprNodeListLiteral: if (pt_is_assignment(cur_pt) && new_top_node->children == NULL) { ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E475: Unable to assign to empty list: %.*s")); } HL_CUR_TOKEN(List); break; case kExprNodeSubscript: HL_CUR_TOKEN(SubscriptBracket); break; default: viml_pexpr_parse_bracket_closing_error: assert(!kv_size(ast_stack)); ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Unexpected closing figure brace: %.*s")); HL_CUR_TOKEN(List); break; } kvi_push(ast_stack, new_top_node_p); want_node = kENodeOperator; if (kv_size(ast_stack) <= asgn_level) { assert(kv_size(ast_stack) == asgn_level); asgn_level = 0; if (cur_pt == kEPTAssignment) { assert(ast.err.msg); } else if (cur_pt == kEPTExpr && kv_size(pt_stack) > 1 && pt_is_assignment(kv_Z(pt_stack, 1))) { kv_drop(pt_stack, 1); } } if (cur_pt == kEPTSingleAssignment && kv_size(ast_stack) == 1) { kv_drop(pt_stack, 1); } } else { if (want_node == kENodeValue) { // Value means list literal or list assignment. NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral); *top_node_p = cur_node; kvi_push(ast_stack, &cur_node->children); want_node = kENodeValue; if (cur_pt == kEPTAssignment) { // Additional assignment parse type allows to easily forbid nested // lists. kvi_push(pt_stack, kEPTSingleAssignment); } else if (cur_pt == kEPTSingleAssignment) { ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E475: Nested lists not allowed when assigning: %.*s")); } HL_CUR_TOKEN(List); } else { // Operator means subscript, also in assignment. But in assignment // subscript may be pretty much any expression, so need to push // kEPTExpr. if (prev_token.type == kExprLexSpacing) { OP_MISSING; } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeSubscript); ADD_OP_NODE(cur_node); HL_CUR_TOKEN(SubscriptBracket); if (pt_is_assignment(cur_pt)) { assert(want_node == kENodeValue); // Subtract 1 for NULL at top. asgn_level = kv_size(ast_stack) - 1; kvi_push(pt_stack, kEPTExpr); } } } break; case kExprLexFigureBrace: if (cur_token.data.brc.closing) { ExprASTNode **new_top_node_p = NULL; // Always drop the topmost value: // // 1. When want_node != kENodeValue topmost item on stack is // a *finished* left operand, which may as well be "{@a}" which // needs not be finished again. // 2. Otherwise it is pointing to NULL what nobody wants. kv_drop(ast_stack, 1); if (!kv_size(ast_stack)) { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure); cur_node->data.fig.type_guesses.allow_lambda = false; cur_node->data.fig.type_guesses.allow_dict = false; cur_node->data.fig.type_guesses.allow_ident = false; cur_node->len = 0; if (want_node != kENodeValue) { cur_node->children = *top_node_p; } *top_node_p = cur_node; new_top_node_p = top_node_p; goto viml_pexpr_parse_figure_brace_closing_error; } if (want_node == kENodeValue) { if ((*kv_last(ast_stack))->type != kExprNodeUnknownFigure && (*kv_last(ast_stack))->type != kExprNodeComma) { // kv_last being UnknownFigure may occur for empty dictionary // literal, while Comma is expected in case of non-empty one. ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected value, got closing figure brace: %.*s")); } } do { new_top_node_p = kv_pop(ast_stack); } while (kv_size(ast_stack) && (new_top_node_p == NULL || ((*new_top_node_p)->type != kExprNodeUnknownFigure && (*new_top_node_p)->type != kExprNodeDictLiteral && ((*new_top_node_p)->type != kExprNodeCurlyBracesIdentifier) && (*new_top_node_p)->type != kExprNodeLambda))); ExprASTNode *new_top_node = *new_top_node_p; switch (new_top_node->type) { case kExprNodeUnknownFigure: if (new_top_node->children == NULL) { // No children of curly braces node indicates empty dictionary. assert(want_node == kENodeValue); assert(new_top_node->data.fig.type_guesses.allow_dict); SELECT_FIGURE_BRACE_TYPE(new_top_node, DictLiteral, Dict); HL_CUR_TOKEN(Dict); } else if (new_top_node->data.fig.type_guesses.allow_ident) { SELECT_FIGURE_BRACE_TYPE(new_top_node, CurlyBracesIdentifier, Curly); HL_CUR_TOKEN(Curly); } else { // If by this time type of the node has not already been // guessed, but it definitely is not a curly braces name then // it is invalid for sure. ERROR_FROM_NODE_AND_MSG(new_top_node, _("E15: Don't know what figure brace means: %.*s")); if (pstate->colors) { // Will reset to NvimInvalidFigureBrace. kv_A(*pstate->colors, new_top_node->data.fig.opening_hl_idx).group = ( HL(FigureBrace)); } HL_CUR_TOKEN(FigureBrace); } break; case kExprNodeDictLiteral: HL_CUR_TOKEN(Dict); break; case kExprNodeCurlyBracesIdentifier: HL_CUR_TOKEN(Curly); break; case kExprNodeLambda: HL_CUR_TOKEN(Lambda); break; default: viml_pexpr_parse_figure_brace_closing_error: assert(!kv_size(ast_stack)); ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Unexpected closing figure brace: %.*s")); HL_CUR_TOKEN(FigureBrace); break; } kvi_push(ast_stack, new_top_node_p); want_node = kENodeOperator; if (kv_size(ast_stack) <= asgn_level) { assert(kv_size(ast_stack) == asgn_level); if (cur_pt == kEPTExpr && kv_size(pt_stack) > 1 && pt_is_assignment(kv_Z(pt_stack, 1))) { kv_drop(pt_stack, 1); asgn_level = 0; } } } else { if (want_node == kENodeValue) { HL_CUR_TOKEN(FigureBrace); // Value: may be any of lambda, dictionary literal and curly braces // name. // Though if we are in an assignment this may only be a curly braces // name. if (pt_is_assignment(cur_pt)) { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCurlyBracesIdentifier); cur_node->data.fig.type_guesses.allow_lambda = false; cur_node->data.fig.type_guesses.allow_dict = false; cur_node->data.fig.type_guesses.allow_ident = true; kvi_push(pt_stack, kEPTExpr); } else { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure); cur_node->data.fig.type_guesses.allow_lambda = true; cur_node->data.fig.type_guesses.allow_dict = true; cur_node->data.fig.type_guesses.allow_ident = true; } if (pstate->colors) { cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors) - 1; } *top_node_p = cur_node; kvi_push(ast_stack, &cur_node->children); kvi_push(pt_stack, kEPTLambdaArguments); lambda_node = cur_node; } else { // uncrustify:off ADD_IDENT(do { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCurlyBracesIdentifier); cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors); cur_node->data.fig.type_guesses.allow_lambda = false; cur_node->data.fig.type_guesses.allow_dict = false; cur_node->data.fig.type_guesses.allow_ident = true; kvi_push(ast_stack, &cur_node->children); if (pt_is_assignment(cur_pt)) { kvi_push(pt_stack, kEPTExpr); } want_node = kENodeValue; } while (0), Curly); // uncrustify:on } if (pt_is_assignment(cur_pt) && !pt_is_assignment(kv_last(pt_stack))) { assert(want_node == kENodeValue); // Subtract 1 for NULL at top. asgn_level = kv_size(ast_stack) - 1; } } break; case kExprLexArrow: if (cur_pt == kEPTLambdaArguments) { kv_drop(pt_stack, 1); assert(kv_size(pt_stack)); if (want_node == kENodeValue) { // Wanting value means trailing comma and NULL at the top of the // stack. kv_drop(ast_stack, 1); } assert(kv_size(ast_stack) >= 1); while ((*kv_last(ast_stack))->type != kExprNodeLambda && (*kv_last(ast_stack))->type != kExprNodeUnknownFigure) { kv_drop(ast_stack, 1); } assert((*kv_last(ast_stack)) == lambda_node); SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda); NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow); if (lambda_node->children == NULL) { assert(want_node == kENodeValue); lambda_node->children = cur_node; kvi_push(ast_stack, &lambda_node->children); } else { assert(lambda_node->children->next == NULL); lambda_node->children->next = cur_node; kvi_push(ast_stack, &lambda_node->children->next); } kvi_push(ast_stack, &cur_node->children); lambda_node = NULL; } else { // Only first branch is valid. ADD_VALUE_IF_MISSING(_("E15: Unexpected arrow: %.*s")); ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Arrow outside of lambda: %.*s")); NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow); ADD_OP_NODE(cur_node); } want_node = kENodeValue; HL_CUR_TOKEN(Arrow); break; case kExprLexPlainIdentifier: { const ExprVarScope scope = (cur_token.type == kExprLexInvalid ? kExprVarScopeMissing : cur_token.data.var.scope); if (want_node == kENodeValue) { want_node = kENodeOperator; NEW_NODE_WITH_CUR_POS(cur_node, (node_is_key ? kExprNodePlainKey : kExprNodePlainIdentifier)); cur_node->data.var.scope = scope; const size_t scope_shift = (scope == kExprVarScopeMissing ? 0 : 2); cur_node->data.var.ident = (pline.data + cur_token.start.col + scope_shift); cur_node->data.var.ident_len = cur_token.len - scope_shift; *top_node_p = cur_node; if (scope_shift) { assert(!node_is_key); viml_parser_highlight(pstate, cur_token.start, 1, HL(IdentifierScope)); viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1, HL(IdentifierScopeDelimiter)); } viml_parser_highlight(pstate, shifted_pos(cur_token.start, scope_shift), cur_token.len - scope_shift, (node_is_key ? HL(IdentifierKey) : HL(IdentifierName))); } else { if (scope == kExprVarScopeMissing) { // uncrustify:off ADD_IDENT(do { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier); cur_node->data.var.scope = scope; cur_node->data.var.ident = pline.data + cur_token.start.col; cur_node->data.var.ident_len = cur_token.len; want_node = kENodeOperator; } while (0), IdentifierName); // uncrustify:on } else { OP_MISSING; } } break; } case kExprLexNumber: if (want_node != kENodeValue) { OP_MISSING; } if (node_is_key) { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainKey); cur_node->data.var.ident = pline.data + cur_token.start.col; cur_node->data.var.ident_len = cur_token.len; HL_CUR_TOKEN(IdentifierKey); } else if (cur_token.data.num.is_float) { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeFloat); cur_node->data.flt.value = cur_token.data.num.val.floating; HL_CUR_TOKEN(Float); } else { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeInteger); cur_node->data.num.value = cur_token.data.num.val.integer; const uint8_t prefix_length = base_to_prefix_length[ cur_token.data.num.base]; viml_parser_highlight(pstate, cur_token.start, prefix_length, HL(NumberPrefix)); viml_parser_highlight(pstate, shifted_pos(cur_token.start, prefix_length), cur_token.len - prefix_length, HL(Number)); } want_node = kENodeOperator; *top_node_p = cur_node; break; case kExprLexDot: ADD_VALUE_IF_MISSING(_("E15: Unexpected dot: %.*s")); if (prev_token.type == kExprLexSpacing) { if (cur_pt == kEPTAssignment) { ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Cannot concatenate in assignments: %.*s")); } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcat); HL_CUR_TOKEN(Concat); } else { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcatOrSubscript); HL_CUR_TOKEN(ConcatOrSubscript); } ADD_OP_NODE(cur_node); break; case kExprLexParenthesis: if (cur_token.data.brc.closing) { if (want_node == kENodeValue) { if (kv_size(ast_stack) > 1) { const ExprASTNode *const prev_top_node = *kv_Z(ast_stack, 1); if (prev_top_node->type == kExprNodeCall) { // Function call without arguments, this is not an error. // But further code does not expect NULL nodes. kv_drop(ast_stack, 1); goto viml_pexpr_parse_no_paren_closing_error; } } ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected value, got parenthesis: %.*s")); NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing); cur_node->len = 0; *top_node_p = cur_node; } else { // Always drop the topmost value: when want_node != kENodeValue // topmost item on stack is a *finished* left operand, which may as // well be "(@a)" which needs not be finished again. kv_drop(ast_stack, 1); } viml_pexpr_parse_no_paren_closing_error: {} ExprASTNode **new_top_node_p = NULL; while (kv_size(ast_stack) && (new_top_node_p == NULL || ((*new_top_node_p)->type != kExprNodeNested && (*new_top_node_p)->type != kExprNodeCall))) { new_top_node_p = kv_pop(ast_stack); } if (new_top_node_p != NULL && ((*new_top_node_p)->type == kExprNodeNested || (*new_top_node_p)->type == kExprNodeCall)) { if ((*new_top_node_p)->type == kExprNodeNested) { HL_CUR_TOKEN(NestingParenthesis); } else { HL_CUR_TOKEN(CallingParenthesis); } } else { // “Always drop the topmost value” branch has got rid of the single // value stack had, so there is nothing known to enclose. Correct // this. if (new_top_node_p == NULL) { new_top_node_p = top_node_p; } ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Unexpected closing parenthesis: %.*s")); HL_CUR_TOKEN(NestingParenthesis); cur_node = NEW_NODE(kExprNodeNested); cur_node->start = cur_token.start; cur_node->len = 0; // Unexpected closing parenthesis, assume that it was wanted to // enclose everything in (). cur_node->children = *new_top_node_p; *new_top_node_p = cur_node; assert(cur_node->next == NULL); } kvi_push(ast_stack, new_top_node_p); want_node = kENodeOperator; } else { switch (want_node) { case kENodeValue: NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNested); *top_node_p = cur_node; kvi_push(ast_stack, &cur_node->children); HL_CUR_TOKEN(NestingParenthesis); break; case kENodeOperator: if (prev_token.type == kExprLexSpacing) { // For some reason "function (args)" is a function call, but // "(funcref) (args)" is not. AFAIR this somehow involves // compatibility and Bram was commenting that this is // intentionally inconsistent and he is not very happy with the // situation himself. if ((*top_node_p)->type != kExprNodePlainIdentifier && (*top_node_p)->type != kExprNodeComplexIdentifier && (*top_node_p)->type != kExprNodeCurlyBracesIdentifier) { OP_MISSING; } } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCall); ADD_OP_NODE(cur_node); HL_CUR_TOKEN(CallingParenthesis); break; } want_node = kENodeValue; } break; case kExprLexQuestion: { ADD_VALUE_IF_MISSING(_("E15: Expected value, got question mark: %.*s")); NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeTernary); ADD_OP_NODE(cur_node); HL_CUR_TOKEN(Ternary); ExprASTNode *ter_val_node; NEW_NODE_WITH_CUR_POS(ter_val_node, kExprNodeTernaryValue); ter_val_node->data.ter.got_colon = false; assert(cur_node->children != NULL); assert(cur_node->children->next == NULL); assert(kv_last(ast_stack) == &cur_node->children->next); *kv_last(ast_stack) = ter_val_node; kvi_push(ast_stack, &ter_val_node->children); break; } case kExprLexDoubleQuotedString: case kExprLexSingleQuotedString: { const bool is_double = (tok_type == kExprLexDoubleQuotedString); if (!cur_token.data.str.closed) { // It is weird, but Vim has two identical errors messages with // different error numbers: "E114: Missing quote" and // "E115: Missing quote". ERROR_FROM_TOKEN_AND_MSG(cur_token, (is_double ? _("E114: Missing double quote: %.*s") : _("E115: Missing single quote: %.*s"))); } if (want_node == kENodeOperator) { OP_MISSING; } NEW_NODE_WITH_CUR_POS(cur_node, (is_double ? kExprNodeDoubleQuotedString : kExprNodeSingleQuotedString)); *top_node_p = cur_node; parse_quoted_string(pstate, cur_node, cur_token, &ast_stack, is_invalid); want_node = kENodeOperator; break; } case kExprLexAssignment: if (cur_pt == kEPTAssignment) { kv_drop(pt_stack, 1); } else if (cur_pt == kEPTSingleAssignment) { kv_drop(pt_stack, 2); ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E475: Expected closing bracket to end list assignment " "lvalue: %.*s")); } else { ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Misplaced assignment: %.*s")); } assert(kv_size(pt_stack)); assert(kv_last(pt_stack) == kEPTExpr); ADD_VALUE_IF_MISSING(_("E15: Unexpected assignment: %.*s")); NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeAssignment); cur_node->data.ass.type = cur_token.data.ass.type; switch (cur_token.data.ass.type) { #define HL_ASGN(asgn, hl) \ case kExprAsgn##asgn: { HL_CUR_TOKEN(hl); break; } HL_ASGN(Plain, PlainAssignment) HL_ASGN(Add, AssignmentWithAddition) HL_ASGN(Subtract, AssignmentWithSubtraction) HL_ASGN(Concat, AssignmentWithConcatenation) #undef HL_ASGN } ADD_OP_NODE(cur_node); break; } viml_pexpr_parse_cycle_end: prev_token = cur_token; highlighted_prev_spacing = false; viml_parser_advance(pstate, cur_token.len); } while (true); viml_pexpr_parse_end: assert(kv_size(pt_stack)); assert(kv_size(ast_stack)); if (want_node == kENodeValue // Blacklist some parse type entries as their presence means better error // message in the other branch. && kv_last(pt_stack) != kEPTLambdaArguments) { east_set_error(pstate, &ast.err, _("E15: Expected value, got EOC: %.*s"), pstate->pos); } else if (kv_size(ast_stack) != 1) { // Something may be wrong, check whether it really is. // Pointer to ast.root must never be dropped, so “!= 1” is expected to be // the same as “> 1”. assert(kv_size(ast_stack)); // Topmost stack item must be a *finished* value, so it must not be // analyzed. E.g. it may contain an already finished nested expression. kv_drop(ast_stack, 1); while (ast.err.msg == NULL && kv_size(ast_stack)) { const ExprASTNode *const cur_node = (*kv_pop(ast_stack)); // This should only happen when want_node == kENodeValue. assert(cur_node != NULL); // TODO(ZyX-I): Rehighlight as invalid? switch (cur_node->type) { case kExprNodeOpMissing: case kExprNodeMissing: // Error should’ve been already reported. break; case kExprNodeCall: east_set_error(pstate, &ast.err, _("E116: Missing closing parenthesis for function call: %.*s"), cur_node->start); break; case kExprNodeNested: east_set_error(pstate, &ast.err, _("E110: Missing closing parenthesis for nested expression" ": %.*s"), cur_node->start); break; case kExprNodeListLiteral: // For whatever reason "[1" yields "E696: Missing comma in list" error // in Vim while "[1," yields E697. east_set_error(pstate, &ast.err, _("E697: Missing end of List ']': %.*s"), cur_node->start); break; case kExprNodeDictLiteral: // Same problem like with list literal with E722 (missing comma) vs // E723, but additionally just "{" yields only E15. east_set_error(pstate, &ast.err, _("E723: Missing end of Dictionary '}': %.*s"), cur_node->start); break; case kExprNodeUnknownFigure: east_set_error(pstate, &ast.err, _("E15: Missing closing figure brace: %.*s"), cur_node->start); break; case kExprNodeLambda: east_set_error(pstate, &ast.err, _("E15: Missing closing figure brace for lambda: %.*s"), cur_node->start); break; case kExprNodeCurlyBracesIdentifier: // Until trailing "}" it is impossible to distinguish curly braces // identifier and dictionary, so it must not appear in the stack like // this. abort(); case kExprNodeInteger: case kExprNodeFloat: case kExprNodeSingleQuotedString: case kExprNodeDoubleQuotedString: case kExprNodeOption: case kExprNodeEnvironment: case kExprNodeRegister: case kExprNodePlainIdentifier: case kExprNodePlainKey: // These are plain values and not containers, for them it should only // be possible to show up in the topmost stack element, but it was // unconditionally popped at the start. abort(); case kExprNodeComma: case kExprNodeColon: case kExprNodeArrow: // It is actually only valid inside something else, but everything // where one of the above is valid requires to be closed and thus is // to be caught later. break; case kExprNodeSubscript: case kExprNodeConcatOrSubscript: case kExprNodeComplexIdentifier: case kExprNodeAssignment: case kExprNodeMod: case kExprNodeDivision: case kExprNodeMultiplication: case kExprNodeNot: case kExprNodeAnd: case kExprNodeOr: case kExprNodeConcat: case kExprNodeComparison: case kExprNodeUnaryMinus: case kExprNodeUnaryPlus: case kExprNodeBinaryMinus: case kExprNodeTernary: case kExprNodeBinaryPlus: // It is OK to see these in the stack. break; case kExprNodeTernaryValue: if (!cur_node->data.ter.got_colon) { // Actually Vim throws E109 in more cases. east_set_error(pstate, &ast.err, _("E109: Missing ':' after '?': %.*s"), cur_node->start); } break; } } } kvi_destroy(ast_stack); return ast; } #undef NEW_NODE #undef HL