diff options
Diffstat (limited to 'src/nvim/viml/parser')
| -rw-r--r-- | src/nvim/viml/parser/expressions.c | 3093 | ||||
| -rw-r--r-- | src/nvim/viml/parser/expressions.h | 389 | ||||
| -rw-r--r-- | src/nvim/viml/parser/parser.c | 16 | ||||
| -rw-r--r-- | src/nvim/viml/parser/parser.h | 244 | 
4 files changed, 3742 insertions, 0 deletions
| diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c new file mode 100644 index 0000000000..1a7e55c11e --- /dev/null +++ b/src/nvim/viml/parser/expressions.c @@ -0,0 +1,3093 @@ +// This is an open source non-commercial project. Dear PVS-Studio, please check +// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com + +/// VimL expression parser + +// Planned incompatibilities (to be included into vim_diff.txt when this parser +// will be an actual part of VimL evaluation process): +// +// 1. Expressions are first fully parsed and only then executed.  This means +//    that while ":echo [system('touch abc')" will create file "abc" in Vim and +//    only then raise syntax error regarding missing comma in list in Neovim +//    trying to execute that will immediately raise syntax error regarding +//    missing list end without actually executing anything. +// 2. Expressions are first fully parsed, without considering any runtime +//    information.  This means things like that "d.a" does not change its +//    meaning depending on type of "d" (or whether Vim is currently executing or +//    skipping).  For compatibility reasons the dot thus may either be “concat +//    or subscript” operator or just “concat” operator. +// 3. Expressions parser is aware whether it is called for :echo or <C-r>=. +//    This means that while "<C-r>=1 | 2<CR>" is equivalent to "<C-r>=1<CR>" +//    because "| 2" part is left to be treated as a command separator and then +//    ignored in Neovim it is an error. +// 4. Expressions parser has generally better error reporting.  But for +//    compatibility reasons most errors have error code E15 while error messages +//    are significantly different from Vim’s E15.  Also some error codes were +//    retired because of being harder to emulate or because of them being +//    a result of differences in parsing process: e.g. with ":echo {a, b}" Vim +//    will attempt to parse expression as lambda, fail, check whether it is +//    a curly-braces-name, fail again, and evaluate that as a dictionary, giving +//    error regarding undefined variable "a" (or about missing colon).  Neovim +//    will not try to evaluate anything here: comma right after an argument name +//    means that expression may not be anything, but lambda, so the resulting +//    error message will never be about missing variable or colon: it will be +//    about missing arrow (or a continuation of argument list). +// 5. Failing to parse expression always gives exactly one error message: no +//    more stack of error messages like > +// +//        :echo [1, +//        E697: Missing end of List ']': +//        E15: Invalid expression: [1, +// +// <  , just exactly one E697 message. +// 6. Some expressions involving calling parenthesis which are treated +//    separately by Vim even when not separated by spaces are treated as one +//    expression by Neovim: e.g. ":echo (1)(1)" will yield runtime error after +//    failing to call "1", while Vim will echo "1 1". Reasoning is the same: +//    type of what is in the first expression is generally not known when +//    parsing, so to have separate expressions like this separate them with +//    spaces. +// 7. 'isident' no longer applies to environment variables, they always include +//    ASCII alphanumeric characters and underscore and nothing except this. + +#include <stdbool.h> +#include <stddef.h> +#include <assert.h> +#include <string.h> + +#include "nvim/vim.h" +#include "nvim/memory.h" +#include "nvim/types.h" +#include "nvim/charset.h" +#include "nvim/ascii.h" +#include "nvim/assert.h" +#include "nvim/lib/kvec.h" +#include "nvim/eval/typval.h" + +#include "nvim/viml/parser/expressions.h" +#include "nvim/viml/parser/parser.h" + +#define vim_str2nr(s, ...) vim_str2nr((const char_u *)(s), __VA_ARGS__) + +typedef kvec_withinit_t(ExprASTNode **, 16) ExprASTStack; + +/// Which nodes may be wanted +typedef enum { +  /// Operators: function call, subscripts, binary operators, … +  /// +  /// For unrestricted expressions. +  kENodeOperator, +  /// Values: literals, variables, nested expressions, unary operators. +  /// +  /// For unrestricted expressions as well, implies that top item in AST stack +  /// points to NULL. +  kENodeValue, +} ExprASTWantedNode; + +/// Parse type: what is being parsed currently +typedef enum { +  /// Parsing regular VimL expression +  kEPTExpr = 0, +  /// Parsing lambda arguments +  /// +  /// Just like parsing function arguments, but it is valid to be ended with an +  /// arrow only. +  kEPTLambdaArguments, +  /// Assignment: parsing for :let +  kEPTAssignment, +  /// Single assignment: used when lists are not allowed (i.e. when nesting) +  kEPTSingleAssignment, +} ExprASTParseType; + +typedef kvec_withinit_t(ExprASTParseType, 4) ExprASTParseTypeStack; + +/// Operator priority level +typedef enum { +  kEOpLvlInvalid = 0, +  kEOpLvlComplexIdentifier, +  kEOpLvlParens, +  kEOpLvlAssignment, +  kEOpLvlArrow, +  kEOpLvlComma, +  kEOpLvlColon, +  kEOpLvlTernaryValue, +  kEOpLvlTernary, +  kEOpLvlOr, +  kEOpLvlAnd, +  kEOpLvlComparison, +  kEOpLvlAddition,  ///< Addition, subtraction and concatenation. +  kEOpLvlMultiplication,  ///< Multiplication, division and modulo. +  kEOpLvlUnary,  ///< Unary operations: not, minus, plus. +  kEOpLvlSubscript,  ///< Subscripts. +  kEOpLvlValue,  ///< Values: literals, variables, nested expressions, … +} ExprOpLvl; + +/// Operator associativity +typedef enum { +  kEOpAssNo= 'n',  ///< Not associative / not applicable. +  kEOpAssLeft = 'l',  ///< Left associativity. +  kEOpAssRight = 'r',  ///< Right associativity. +} ExprOpAssociativity; + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "viml/parser/expressions.c.generated.h" +#endif + +/// Character used as a separator in autoload function/variable names. +#define AUTOLOAD_CHAR '#' + +/// Scale number by a given factor +/// +/// Used to apply exponent to a number. Idea taken from uClibc. +/// +/// @param[in]  num  Number to scale. Does not bother doing anything if it is +///                  zero. +/// @param[in]  base  Base, should be 10 since non-decimal floating-point +///                   numbers are not supported. +/// @param[in]  exponent  Exponent to scale by. +/// @param[in]  exponent_negative  True if exponent is negative. +static inline float_T scale_number(const float_T num, +                                   const uint8_t base, +                                   const uvarnumber_T exponent, +                                   const bool exponent_negative) +  FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_CONST +{ +  if (num == 0 || exponent == 0) { +    return num; +  } +  assert(base); +  uvarnumber_T exp = exponent; +  float_T p_base = (float_T)base; +  float_T ret = num; +  while (exp) { +    if (exp & 1) { +      if (exponent_negative) { +        ret /= p_base; +      } else { +        ret *= p_base; +      } +    } +    exp >>= 1; +    p_base *= p_base; +  } +  return ret; +} + +/// Get next token for the VimL expression input +/// +/// @param  pstate  Parser state. +/// @param[in]  flags  Flags, @see LexExprFlags. +/// +/// @return Next token. +LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) +  FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL +{ +  LexExprToken ret = { +    .type = kExprLexInvalid, +    .start = pstate->pos, +  }; +  ParserLine pline; +  if (!viml_parser_get_remaining_line(pstate, &pline)) { +    ret.type = kExprLexEOC; +    return ret; +  } +  if (pline.size <= 0) { +    ret.len = 0; +    ret.type = kExprLexEOC; +    goto viml_pexpr_next_token_adv_return; +  } +  ret.len = 1; +  const uint8_t schar = (uint8_t)pline.data[0]; +#define GET_CCS(ret, pline) \ +  do { \ +    if (ret.len < pline.size \ +        && strchr("?#", pline.data[ret.len]) != NULL) { \ +      ret.data.cmp.ccs = \ +          (ExprCaseCompareStrategy)pline.data[ret.len]; \ +      ret.len++; \ +    } else { \ +      ret.data.cmp.ccs = kCCStrategyUseOption; \ +    } \ +  } while (0) +  switch (schar) { +    // Paired brackets. +#define BRACKET(typ, opning, clsing) \ +    case opning: \ +    case clsing: { \ +      ret.type = typ; \ +      ret.data.brc.closing = (schar == clsing); \ +      break; \ +    } +    BRACKET(kExprLexParenthesis, '(', ')') +    BRACKET(kExprLexBracket, '[', ']') +    BRACKET(kExprLexFigureBrace, '{', '}') +#undef BRACKET + +    // Single character tokens without data. +#define CHAR(typ, ch) \ +    case ch: { \ +      ret.type = typ; \ +      break; \ +    } +    CHAR(kExprLexQuestion, '?') +    CHAR(kExprLexColon, ':') +    CHAR(kExprLexComma, ',') +#undef CHAR + +    // Multiplication/division/modulo. +#define MUL(mul_type, ch) \ +    case ch: { \ +      ret.type = kExprLexMultiplication; \ +      ret.data.mul.type = mul_type; \ +      break; \ +    } +    MUL(kExprLexMulMul, '*') +    MUL(kExprLexMulDiv, '/') +    MUL(kExprLexMulMod, '%') +#undef MUL + +#define CHARREG(typ, cond) \ +    do { \ +      ret.type = typ; \ +      for (; (ret.len < pline.size \ +              && cond(pline.data[ret.len])) \ +           ; ret.len++) { \ +      } \ +    } while (0) + +    // Whitespace. +    case ' ': +    case TAB: { +      CHARREG(kExprLexSpacing, ascii_iswhite); +      break; +    } + +    // Control character, except for NUL, NL and TAB. +    case Ctrl_A: case Ctrl_B: case Ctrl_C: case Ctrl_D: case Ctrl_E: +    case Ctrl_F: case Ctrl_G: case Ctrl_H: + +    case Ctrl_K: case Ctrl_L: case Ctrl_M: case Ctrl_N: case Ctrl_O: +    case Ctrl_P: case Ctrl_Q: case Ctrl_R: case Ctrl_S: case Ctrl_T: +    case Ctrl_U: case Ctrl_V: case Ctrl_W: case Ctrl_X: case Ctrl_Y: +    case Ctrl_Z: { +#define ISCTRL(schar) (schar < ' ') +      CHARREG(kExprLexInvalid, ISCTRL); +      ret.data.err.type = kExprLexSpacing; +      ret.data.err.msg = +          _("E15: Invalid control character present in input: %.*s"); +      break; +#undef ISCTRL +    } + +    // Number. +    case '0': case '1': case '2': case '3': case '4': case '5': case '6': +    case '7': case '8': case '9': { +      ret.data.num.is_float = false; +      ret.data.num.base = 10; +      size_t frac_start = 0; +      size_t exp_start = 0; +      size_t frac_end = 0; +      bool exp_negative = false; +      CHARREG(kExprLexNumber, ascii_isdigit); +      if (flags & kELFlagAllowFloat) { +        const LexExprToken non_float_ret = ret; +        if (pline.size > ret.len + 1 +            && pline.data[ret.len] == '.' +            && ascii_isdigit(pline.data[ret.len + 1])) { +          ret.len++; +          frac_start = ret.len; +          frac_end = ret.len; +          ret.data.num.is_float = true; +          for (; ret.len < pline.size && ascii_isdigit(pline.data[ret.len]) +               ; ret.len++) { +            // A small optimization: trailing zeroes in fractional part do not +            // add anything to significand, so it is useless to include them in +            // frac_end. +            if (pline.data[ret.len] != '0') { +              frac_end = ret.len + 1; +            } +          } +          if (pline.size > ret.len + 1 +              && (pline.data[ret.len] == 'e' +                  || pline.data[ret.len] == 'E') +              && ((pline.size > ret.len + 2 +                   && (pline.data[ret.len + 1] == '+' +                       || pline.data[ret.len + 1] == '-') +                   && ascii_isdigit(pline.data[ret.len + 2])) +                  || ascii_isdigit(pline.data[ret.len + 1]))) { +            ret.len++; +            if (pline.data[ret.len] == '+' +                || (exp_negative = (pline.data[ret.len] == '-'))) { +              ret.len++; +            } +            exp_start = ret.len; +            CHARREG(kExprLexNumber, ascii_isdigit); +          } +        } +        if (pline.size > ret.len +            && (pline.data[ret.len] == '.' +                || ASCII_ISALPHA(pline.data[ret.len]))) { +          ret = non_float_ret; +        } +      } +      // TODO(ZyX-I): detect overflows +      if (ret.data.num.is_float) { +        // Vim used to use string2float here which in turn uses strtod(). There +        // are two problems with this approach: +        // 1. strtod() is locale-dependent. Not sure how it is worked around so +        //    that I do not see relevant bugs, but it still does not look like +        //    a good idea. +        // 2. strtod() does not accept length argument. +        // +        // The below variant of parsing floats was recognized as acceptable +        // because it is basically how uClibc does the thing: it generates +        // a number ignoring decimal point (but recording its position), then +        // uses recorded position to scale number down when processing exponent. +        float_T significand_part = 0; +        uvarnumber_T exp_part = 0; +        const size_t frac_size = (size_t)(frac_end - frac_start); +        for (size_t i = 0; i < frac_end; i++) { +          if (i == frac_start - 1) { +            continue; +          } +          significand_part = significand_part * 10 + (pline.data[i] - '0'); +        } +        if (exp_start) { +          vim_str2nr(pline.data + exp_start, NULL, NULL, 0, NULL, &exp_part, +                     (int)(ret.len - exp_start)); +        } +        if (exp_negative) { +          exp_part += frac_size; +        } else { +          if (exp_part < frac_size) { +            exp_negative = true; +            exp_part = frac_size - exp_part; +          } else { +            exp_part -= frac_size; +          } +        } +        ret.data.num.val.floating = scale_number(significand_part, 10, exp_part, +                                                 exp_negative); +      } else { +        int len; +        int prep; +        vim_str2nr(pline.data, &prep, &len, STR2NR_ALL, NULL, +                   &ret.data.num.val.integer, (int)pline.size); +        ret.len = (size_t)len; +        const uint8_t bases[] = { +          [0] = 10, +          ['0'] = 8, +          ['x'] = 16, ['X'] = 16, +          ['b'] = 2, ['B'] = 2, +        }; +        ret.data.num.base = bases[prep]; +      } +      break; +    } + +#define ISWORD_OR_AUTOLOAD(x) \ +      (ascii_isident(x) || (x) == AUTOLOAD_CHAR) + +    // Environment variable. +    case '$': { +      CHARREG(kExprLexEnv, ascii_isident); +      break; +    } + +    // Normal variable/function name. +    case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': +    case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': +    case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': +    case 'v': case 'w': case 'x': case 'y': case 'z': +    case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': +    case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': +    case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': +    case 'V': case 'W': case 'X': case 'Y': case 'Z': +    case '_': { +      ret.data.var.scope = 0; +      ret.data.var.autoload = false; +      CHARREG(kExprLexPlainIdentifier, ascii_isident); +      // "is" and "isnot" operators. +      if (!(flags & kELFlagIsNotCmp) +          && ((ret.len == 2 && memcmp(pline.data, "is", 2) == 0) +              || (ret.len == 5 && memcmp(pline.data, "isnot", 5) == 0))) { +        ret.type = kExprLexComparison; +        ret.data.cmp.type = kExprCmpIdentical; +        ret.data.cmp.inv = (ret.len == 5); +        GET_CCS(ret, pline); +      // Scope: `s:`, etc. +      } else if (ret.len == 1 +                 && pline.size > 1 +                 && memchr(EXPR_VAR_SCOPE_LIST, schar, +                           sizeof(EXPR_VAR_SCOPE_LIST)) != NULL +                 && pline.data[ret.len] == ':' +                 && !(flags & kELFlagForbidScope)) { +        ret.len++; +        ret.data.var.scope = (ExprVarScope)schar; +        CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD); +        ret.data.var.autoload = ( +            memchr(pline.data + 2, AUTOLOAD_CHAR, ret.len - 2) +            != NULL); +      // Previous CHARREG stopped at autoload character in order to make it +      // possible to detect `is#`. Continue now with autoload characters +      // included. +      // +      // Warning: there is ambiguity for the lexer: `is#Foo(1)` is a call of +      // function `is#Foo()`, `1is#Foo(1)` is a comparison `1 is# Foo(1)`. This +      // needs to be resolved on the higher level where context is available. +      } else if (pline.size > ret.len +                 && pline.data[ret.len] == AUTOLOAD_CHAR) { +        ret.data.var.autoload = true; +        CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD); +      } +      break; +    } + +#undef ISWORD_OR_AUTOLOAD +#undef CHARREG + +    // Option. +    case '&': { +#define OPTNAMEMISS(ret) \ +        do { \ +          ret.type = kExprLexInvalid; \ +          ret.data.err.type = kExprLexOption; \ +          ret.data.err.msg = _("E112: Option name missing: %.*s"); \ +        } while (0) +      if (pline.size > 1 && pline.data[1] == '&') { +        ret.type = kExprLexAnd; +        ret.len++; +        break; +      } +      if (pline.size == 1 || !ASCII_ISALPHA(pline.data[1])) { +        OPTNAMEMISS(ret); +        break; +      } +      ret.type = kExprLexOption; +      if (pline.size > 2 +          && pline.data[2] == ':' +          && memchr(EXPR_OPT_SCOPE_LIST, pline.data[1], +                    sizeof(EXPR_OPT_SCOPE_LIST)) != NULL) { +        ret.len += 2; +        ret.data.opt.scope = (ExprOptScope)pline.data[1]; +        ret.data.opt.name = pline.data + 3; +      } else { +        ret.data.opt.scope = kExprOptScopeUnspecified; +        ret.data.opt.name = pline.data + 1; +      } +      const char *p = ret.data.opt.name; +      const char *const e = pline.data + pline.size; +      if (e - p >= 4 && p[0] == 't' && p[1] == '_') { +        ret.data.opt.len = 4; +        ret.len += 4; +      } else { +        for (; p < e && ASCII_ISALPHA(*p); p++) { +        } +        ret.data.opt.len = (size_t)(p - ret.data.opt.name); +        if (ret.data.opt.len == 0) { +          OPTNAMEMISS(ret); +        } else { +          ret.len += ret.data.opt.len; +        } +      } +      break; +#undef OPTNAMEMISS +    } + +    // Register. +    case '@': { +      ret.type = kExprLexRegister; +      if (pline.size > 1) { +        ret.len++; +        ret.data.reg.name = (uint8_t)pline.data[1]; +      } else { +        ret.data.reg.name = -1; +      } +      break; +    } + +    // Single quoted string. +    case '\'': { +      ret.type = kExprLexSingleQuotedString; +      ret.data.str.closed = false; +      for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) { +        if (pline.data[ret.len] == '\'') { +          if (ret.len + 1 < pline.size && pline.data[ret.len + 1] == '\'') { +            ret.len++; +          } else { +            ret.data.str.closed = true; +          } +        } +      } +      break; +    } + +    // Double quoted string. +    case '"': { +      ret.type = kExprLexDoubleQuotedString; +      ret.data.str.closed = false; +      for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) { +        if (pline.data[ret.len] == '\\') { +          if (ret.len + 1 < pline.size) { +            ret.len++; +          } +        } else if (pline.data[ret.len] == '"') { +          ret.data.str.closed = true; +        } +      } +      break; +    } + +    // Unary not, (un)equality and regex (not) match comparison operators. +    case '!': +    case '=': { +      if (pline.size == 1) { +        ret.type = (schar == '!' ? kExprLexNot : kExprLexAssignment); +        ret.data.ass.type = kExprAsgnPlain; +        break; +      } +      ret.type = kExprLexComparison; +      ret.data.cmp.inv = (schar == '!'); +      if (pline.data[1] == '=') { +        ret.data.cmp.type = kExprCmpEqual; +        ret.len++; +      } else if (pline.data[1] == '~') { +        ret.data.cmp.type = kExprCmpMatches; +        ret.len++; +      } else if (schar == '!') { +        ret.type = kExprLexNot; +      } else { +        ret.type = kExprLexAssignment; +        ret.data.ass.type = kExprAsgnPlain; +      } +      GET_CCS(ret, pline); +      break; +    } + +    // Less/greater [or equal to] comparison operators. +    case '>': +    case '<': { +      ret.type = kExprLexComparison; +      const bool haseqsign = (pline.size > 1 && pline.data[1] == '='); +      if (haseqsign) { +        ret.len++; +      } +      GET_CCS(ret, pline); +      ret.data.cmp.inv = (schar == '<'); +      ret.data.cmp.type = ((ret.data.cmp.inv ^ haseqsign) +                           ? kExprCmpGreaterOrEqual +                           : kExprCmpGreater); +      break; +    } + +    // Minus sign, arrow from lambdas or augmented assignment. +    case '-': { +      if (pline.size > 1 && pline.data[1] == '>') { +        ret.len++; +        ret.type = kExprLexArrow; +      } else if (pline.size > 1 && pline.data[1] == '=') { +        ret.len++; +        ret.type = kExprLexAssignment; +        ret.data.ass.type = kExprAsgnSubtract; +      } else { +        ret.type = kExprLexMinus; +      } +      break; +    } + +    // Sign or augmented assignment. +#define CHAR_OR_ASSIGN(ch, ch_type, ass_type) \ +    case ch: { \ +      if (pline.size > 1 && pline.data[1] == '=') { \ +        ret.len++; \ +        ret.type = kExprLexAssignment; \ +        ret.data.ass.type = ass_type; \ +      } else { \ +        ret.type = ch_type; \ +      } \ +      break; \ +    } +    CHAR_OR_ASSIGN('+', kExprLexPlus, kExprAsgnAdd) +    CHAR_OR_ASSIGN('.', kExprLexDot, kExprAsgnConcat) +#undef CHAR_OR_ASSIGN + +    // Expression end because Ex command ended. +    case NUL: +    case NL: { +      if (flags & kELFlagForbidEOC) { +        ret.type = kExprLexInvalid; +        ret.data.err.msg = _("E15: Unexpected EOC character: %.*s"); +        ret.data.err.type = kExprLexSpacing; +      } else { +        ret.type = kExprLexEOC; +      } +      break; +    } + +    case '|': { +      if (pline.size >= 2 && pline.data[ret.len] == '|') { +        // "||" is or. +        ret.len++; +        ret.type = kExprLexOr; +      } else if (flags & kELFlagForbidEOC) { +        // Note: `<C-r>=1 | 2<CR>` actually yields 1 in Vim without any +        //       errors. This will be changed here. +        ret.type = kExprLexInvalid; +        ret.data.err.msg = _("E15: Unexpected EOC character: %.*s"); +        ret.data.err.type = kExprLexOr; +      } else { +        ret.type = kExprLexEOC; +      } +      break; +    } + +    // Everything else is not valid. +    default: { +      ret.len = (size_t)utfc_ptr2len_len((const char_u *)pline.data, +                                         (int)pline.size); +      ret.type = kExprLexInvalid; +      ret.data.err.type = kExprLexPlainIdentifier; +      ret.data.err.msg = _("E15: Unidentified character: %.*s"); +      break; +    } +  } +#undef GET_CCS +viml_pexpr_next_token_adv_return: +  if (!(flags & kELFlagPeek)) { +    viml_parser_advance(pstate, ret.len); +  } +  return ret; +} + +static const char *const eltkn_type_tab[] = { +  [kExprLexInvalid] = "Invalid", +  [kExprLexMissing] = "Missing", +  [kExprLexSpacing] = "Spacing", +  [kExprLexEOC] = "EOC", + +  [kExprLexQuestion] = "Question", +  [kExprLexColon] = "Colon", +  [kExprLexOr] = "Or", +  [kExprLexAnd] = "And", +  [kExprLexComparison] = "Comparison", +  [kExprLexPlus] = "Plus", +  [kExprLexMinus] = "Minus", +  [kExprLexDot] = "Dot", +  [kExprLexMultiplication] = "Multiplication", + +  [kExprLexNot] = "Not", + +  [kExprLexNumber] = "Number", +  [kExprLexSingleQuotedString] = "SingleQuotedString", +  [kExprLexDoubleQuotedString] = "DoubleQuotedString", +  [kExprLexOption] = "Option", +  [kExprLexRegister] = "Register", +  [kExprLexEnv] = "Env", +  [kExprLexPlainIdentifier] = "PlainIdentifier", + +  [kExprLexBracket] = "Bracket", +  [kExprLexFigureBrace] = "FigureBrace", +  [kExprLexParenthesis] = "Parenthesis", +  [kExprLexComma] = "Comma", +  [kExprLexArrow] = "Arrow", +  [kExprLexAssignment] = "Assignment", +}; + +const char *const eltkn_cmp_type_tab[] = { +  [kExprCmpEqual] = "Equal", +  [kExprCmpMatches] = "Matches", +  [kExprCmpGreater] = "Greater", +  [kExprCmpGreaterOrEqual] = "GreaterOrEqual", +  [kExprCmpIdentical] = "Identical", +}; + +const char *const expr_asgn_type_tab[] = { +  [kExprAsgnPlain] = "Plain", +  [kExprAsgnAdd] = "Add", +  [kExprAsgnSubtract] = "Subtract", +  [kExprAsgnConcat] = "Concat", +}; + +const char *const ccs_tab[] = { +  [kCCStrategyUseOption] = "UseOption", +  [kCCStrategyMatchCase] = "MatchCase", +  [kCCStrategyIgnoreCase] = "IgnoreCase", +}; + +static const char *const eltkn_mul_type_tab[] = { +  [kExprLexMulMul] = "Mul", +  [kExprLexMulDiv] = "Div", +  [kExprLexMulMod] = "Mod", +}; + +static const char *const eltkn_opt_scope_tab[] = { +  [kExprOptScopeUnspecified] = "Unspecified", +  [kExprOptScopeGlobal] = "Global", +  [kExprOptScopeLocal] = "Local", +}; + +/// Represent token as a string +/// +/// Intended for testing and debugging purposes. +/// +/// @param[in]  pstate  Parser state, needed to get token string from it. May be +///                     NULL, in which case in place of obtaining part of the +///                     string represented by token only token length is +///                     returned. +/// @param[in]  token  Token to represent. +/// @param[out]  ret_size  Return string size, for cases like NULs inside +///                        a string. May be NULL. +/// +/// @return Token represented in a string form, in a static buffer (overwritten +///         on each call). +const char *viml_pexpr_repr_token(const ParserState *const pstate, +                                  const LexExprToken token, +                                  size_t *const ret_size) +  FUNC_ATTR_WARN_UNUSED_RESULT +{ +  static char ret[1024]; +  char *p = ret; +  const char *const e = &ret[1024] - 1; +#define ADDSTR(...) \ +  do { \ +    p += snprintf(p, (size_t)(sizeof(ret) - (size_t)(p - ret)), __VA_ARGS__); \ +    if (p >= e) { \ +      goto viml_pexpr_repr_token_end; \ +    } \ +  } while (0) +  ADDSTR("%zu:%zu:%s", token.start.line, token.start.col, +         eltkn_type_tab[token.type]); +  switch (token.type) { +#define TKNARGS(tkn_type, ...) \ +    case tkn_type: { \ +      ADDSTR(__VA_ARGS__); \ +      break; \ +    } +    TKNARGS(kExprLexComparison, "(type=%s,ccs=%s,inv=%i)", +            eltkn_cmp_type_tab[token.data.cmp.type], +            ccs_tab[token.data.cmp.ccs], +            (int)token.data.cmp.inv) +    TKNARGS(kExprLexMultiplication, "(type=%s)", +            eltkn_mul_type_tab[token.data.mul.type]) +    TKNARGS(kExprLexAssignment, "(type=%s)", +            expr_asgn_type_tab[token.data.ass.type]) +    TKNARGS(kExprLexRegister, "(name=%s)", intchar2str(token.data.reg.name)) +    case kExprLexDoubleQuotedString: +    TKNARGS(kExprLexSingleQuotedString, "(closed=%i)", +            (int)token.data.str.closed) +    TKNARGS(kExprLexOption, "(scope=%s,name=%.*s)", +            eltkn_opt_scope_tab[token.data.opt.scope], +            (int)token.data.opt.len, token.data.opt.name) +    TKNARGS(kExprLexPlainIdentifier, "(scope=%s,autoload=%i)", +            intchar2str((int)token.data.var.scope), +            (int)token.data.var.autoload) +    TKNARGS(kExprLexNumber, "(is_float=%i,base=%i,val=%lg)", +            (int)token.data.num.is_float, +            (int)token.data.num.base, +            (double)(token.data.num.is_float +                     ? (double)token.data.num.val.floating +                     : (double)token.data.num.val.integer)) +    TKNARGS(kExprLexInvalid, "(msg=%s)", token.data.err.msg) +    default: { +      // No additional arguments. +      break; +    } +#undef TKNARGS +  } +  if (pstate == NULL) { +    ADDSTR("::%zu", token.len); +  } else { +    *p++ = ':'; +    memmove( +        p, &pstate->reader.lines.items[token.start.line].data[token.start.col], +        token.len); +    p += token.len; +    *p = NUL; +  } +#undef ADDSTR +viml_pexpr_repr_token_end: +  if (ret_size != NULL) { +    *ret_size = (size_t)(p - ret); +  } +  return ret; +} + +const char *const east_node_type_tab[] = { +  [kExprNodeMissing] = "Missing", +  [kExprNodeOpMissing] = "OpMissing", +  [kExprNodeTernary] = "Ternary", +  [kExprNodeTernaryValue] = "TernaryValue", +  [kExprNodeRegister] = "Register", +  [kExprNodeSubscript] = "Subscript", +  [kExprNodeListLiteral] = "ListLiteral", +  [kExprNodeUnaryPlus] = "UnaryPlus", +  [kExprNodeBinaryPlus] = "BinaryPlus", +  [kExprNodeNested] = "Nested", +  [kExprNodeCall] = "Call", +  [kExprNodePlainIdentifier] = "PlainIdentifier", +  [kExprNodePlainKey] = "PlainKey", +  [kExprNodeComplexIdentifier] = "ComplexIdentifier", +  [kExprNodeUnknownFigure] = "UnknownFigure", +  [kExprNodeLambda] = "Lambda", +  [kExprNodeDictLiteral] = "DictLiteral", +  [kExprNodeCurlyBracesIdentifier] = "CurlyBracesIdentifier", +  [kExprNodeComma] = "Comma", +  [kExprNodeColon] = "Colon", +  [kExprNodeArrow] = "Arrow", +  [kExprNodeComparison] = "Comparison", +  [kExprNodeConcat] = "Concat", +  [kExprNodeConcatOrSubscript] = "ConcatOrSubscript", +  [kExprNodeInteger] = "Integer", +  [kExprNodeFloat] = "Float", +  [kExprNodeSingleQuotedString] = "SingleQuotedString", +  [kExprNodeDoubleQuotedString] = "DoubleQuotedString", +  [kExprNodeOr] = "Or", +  [kExprNodeAnd] = "And", +  [kExprNodeUnaryMinus] = "UnaryMinus", +  [kExprNodeBinaryMinus] = "BinaryMinus", +  [kExprNodeNot] = "Not", +  [kExprNodeMultiplication] = "Multiplication", +  [kExprNodeDivision] = "Division", +  [kExprNodeMod] = "Mod", +  [kExprNodeOption] = "Option", +  [kExprNodeEnvironment] = "Environment", +  [kExprNodeAssignment] = "Assignment", +}; + +/// Represent `int` character as a string +/// +/// Converts +/// - ASCII digits into '{digit}' +/// - ASCII printable characters into a single-character strings +/// - everything else to numbers. +/// +/// @param[in]  ch  Character to convert. +/// +/// @return Converted string, stored in a static buffer (overriden after each +///         call). +static const char *intchar2str(const int ch) +  FUNC_ATTR_WARN_UNUSED_RESULT +{ +  static char buf[sizeof(int) * 3 + 1]; +  if (' ' <= ch && ch < 0x7f) { +    if (ascii_isdigit(ch)) { +      buf[0] = '\''; +      buf[1] = (char)ch; +      buf[2] = '\''; +      buf[3] = NUL; +    } else { +      buf[0] = (char)ch; +      buf[1] = NUL; +    } +  } else { +    snprintf(buf, sizeof(buf), "%i", ch); +  } +  return buf; +} + +#ifdef UNIT_TESTING +#include <stdio.h> + +REAL_FATTR_UNUSED +static inline void viml_pexpr_debug_print_ast_node( +    const ExprASTNode *const *const eastnode_p, +    const char *const prefix) +{ +  if (*eastnode_p == NULL) { +    fprintf(stderr, "%s %p : NULL\n", prefix, (void *)eastnode_p); +  } else { +    fprintf(stderr, "%s %p : %p : %s : %zu:%zu:%zu\n", +            prefix, (void *)eastnode_p, (void *)(*eastnode_p), +            east_node_type_tab[(*eastnode_p)->type], (*eastnode_p)->start.line, +            (*eastnode_p)->start.col, (*eastnode_p)->len); +  } +} + +REAL_FATTR_UNUSED +static inline void viml_pexpr_debug_print_ast_stack( +    const ExprASTStack *const ast_stack, +    const char *const msg) +  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE +{ +  fprintf(stderr, "\n%sstack: %zu:\n", msg, kv_size(*ast_stack)); +  for (size_t i = 0; i < kv_size(*ast_stack); i++) { +    viml_pexpr_debug_print_ast_node( +        (const ExprASTNode *const *)kv_A(*ast_stack, i), +        "-"); +  } +} + +REAL_FATTR_UNUSED +static inline void viml_pexpr_debug_print_token( +    const ParserState *const pstate, const LexExprToken token) +  FUNC_ATTR_ALWAYS_INLINE +{ +  fprintf(stderr, "\ntkn: %s\n", viml_pexpr_repr_token(pstate, token, NULL)); +} +#define PSTACK(msg) \ +    viml_pexpr_debug_print_ast_stack(&ast_stack, #msg) +#define PSTACK_P(msg) \ +    viml_pexpr_debug_print_ast_stack(ast_stack, #msg) +#define PNODE_P(eastnode_p, msg) \ +    viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)eastnode_p, \ +                                    (#msg)) +#define PTOKEN(tkn) \ +    viml_pexpr_debug_print_token(pstate, tkn) +#endif + +const uint8_t node_maxchildren[] = { +  [kExprNodeMissing] = 0, +  [kExprNodeOpMissing] = 2, +  [kExprNodeTernary] = 2, +  [kExprNodeTernaryValue] = 2, +  [kExprNodeRegister] = 0, +  [kExprNodeSubscript] = 2, +  [kExprNodeListLiteral] = 1, +  [kExprNodeUnaryPlus] = 1, +  [kExprNodeBinaryPlus] = 2, +  [kExprNodeNested] = 1, +  [kExprNodeCall] = 2, +  [kExprNodePlainIdentifier] = 0, +  [kExprNodePlainKey] = 0, +  [kExprNodeComplexIdentifier] = 2, +  [kExprNodeUnknownFigure] = 1, +  [kExprNodeLambda] = 2, +  [kExprNodeDictLiteral] = 1, +  [kExprNodeCurlyBracesIdentifier] = 1, +  [kExprNodeComma] = 2, +  [kExprNodeColon] = 2, +  [kExprNodeArrow] = 2, +  [kExprNodeComparison] = 2, +  [kExprNodeConcat] = 2, +  [kExprNodeConcatOrSubscript] = 2, +  [kExprNodeInteger] = 0, +  [kExprNodeFloat] = 0, +  [kExprNodeSingleQuotedString] = 0, +  [kExprNodeDoubleQuotedString] = 0, +  [kExprNodeOr] = 2, +  [kExprNodeAnd] = 2, +  [kExprNodeUnaryMinus] = 1, +  [kExprNodeBinaryMinus] = 2, +  [kExprNodeNot] = 1, +  [kExprNodeMultiplication] = 2, +  [kExprNodeDivision] = 2, +  [kExprNodeMod] = 2, +  [kExprNodeOption] = 0, +  [kExprNodeEnvironment] = 0, +  [kExprNodeAssignment] = 2, +}; + +/// Free memory occupied by AST +/// +/// @param  ast  AST stack to free. +void viml_pexpr_free_ast(ExprAST ast) +{ +  ExprASTStack ast_stack; +  kvi_init(ast_stack); +  kvi_push(ast_stack, &ast.root); +  while (kv_size(ast_stack)) { +    ExprASTNode **const cur_node = kv_last(ast_stack); +#ifndef NDEBUG +    // Explicitly check for AST recursiveness. +    for (size_t i = 0 ; i < kv_size(ast_stack) - 1 ; i++) { +      assert(*kv_A(ast_stack, i) != *cur_node); +    } +#endif +    if (*cur_node == NULL) { +      assert(kv_size(ast_stack) == 1); +      kv_drop(ast_stack, 1); +    } else if ((*cur_node)->children != NULL) { +#ifndef NDEBUG +      const uint8_t maxchildren = node_maxchildren[(*cur_node)->type]; +      assert(maxchildren > 0); +      assert(maxchildren <= 2); +      assert(maxchildren == 1 +             ? (*cur_node)->children->next == NULL +             : ((*cur_node)->children->next == NULL +                || (*cur_node)->children->next->next == NULL)); +#endif +      kvi_push(ast_stack, &(*cur_node)->children); +    } else if ((*cur_node)->next != NULL) { +      kvi_push(ast_stack, &(*cur_node)->next); +    } else if (*cur_node != NULL) { +      kv_drop(ast_stack, 1); +      switch ((*cur_node)->type) { +        case kExprNodeDoubleQuotedString: +        case kExprNodeSingleQuotedString: { +          xfree((*cur_node)->data.str.value); +          break; +        } +        case kExprNodeMissing: +        case kExprNodeOpMissing: +        case kExprNodeTernary: +        case kExprNodeTernaryValue: +        case kExprNodeRegister: +        case kExprNodeSubscript: +        case kExprNodeListLiteral: +        case kExprNodeUnaryPlus: +        case kExprNodeBinaryPlus: +        case kExprNodeNested: +        case kExprNodeCall: +        case kExprNodePlainIdentifier: +        case kExprNodePlainKey: +        case kExprNodeComplexIdentifier: +        case kExprNodeUnknownFigure: +        case kExprNodeLambda: +        case kExprNodeDictLiteral: +        case kExprNodeCurlyBracesIdentifier: +        case kExprNodeAssignment: +        case kExprNodeComma: +        case kExprNodeColon: +        case kExprNodeArrow: +        case kExprNodeComparison: +        case kExprNodeConcat: +        case kExprNodeConcatOrSubscript: +        case kExprNodeInteger: +        case kExprNodeFloat: +        case kExprNodeOr: +        case kExprNodeAnd: +        case kExprNodeUnaryMinus: +        case kExprNodeBinaryMinus: +        case kExprNodeNot: +        case kExprNodeMultiplication: +        case kExprNodeDivision: +        case kExprNodeMod: +        case kExprNodeOption: +        case kExprNodeEnvironment: { +          break; +        } +      } +      xfree(*cur_node); +      *cur_node = NULL; +    } +  } +  kvi_destroy(ast_stack); +} + +// Binary operator precedence and associativity: +// +// Operator | Precedence | Associativity +// ---------+------------+----------------- +// ||       | 2          | left +// &&       | 3          | left +// cmp*     | 4          | not associative +// + - .    | 5          | left +// * / %    | 6          | left +// +// * comparison operators: +// +// == ==# ==?  != !=# !=? +// =~ =~# =~?  !~ !~# !~? +//  >  >#  >?  <= <=# <=? +//  <  <#  <?  >= >=# >=? +// is is# is?  isnot isnot# isnot? + +/// Allocate a new node and set some of the values +/// +/// @param[in]  type  Node type to allocate. +/// @param[in]  level  Node level to allocate +static inline ExprASTNode *viml_pexpr_new_node(const ExprASTNodeType type) +  FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_MALLOC +{ +  ExprASTNode *ret = xmalloc(sizeof(*ret)); +  ret->type = type; +  ret->children = NULL; +  ret->next = NULL; +  return ret; +} + +static struct { +  ExprOpLvl lvl; +  ExprOpAssociativity ass; +} node_type_to_node_props[] = { +  [kExprNodeMissing] = { kEOpLvlInvalid, kEOpAssNo, }, +  [kExprNodeOpMissing] = { kEOpLvlMultiplication, kEOpAssNo }, + +  [kExprNodeNested] = { kEOpLvlParens, kEOpAssNo }, +  // Note: below nodes are kEOpLvlSubscript for “binary operator” itself, but +  //       kEOpLvlParens when it comes to inside the parenthesis. +  [kExprNodeCall] = { kEOpLvlParens, kEOpAssNo }, +  [kExprNodeSubscript] = { kEOpLvlParens, kEOpAssNo }, + +  [kExprNodeUnknownFigure] = { kEOpLvlParens, kEOpAssLeft }, +  [kExprNodeLambda] = { kEOpLvlParens, kEOpAssNo }, +  [kExprNodeDictLiteral] = { kEOpLvlParens, kEOpAssNo }, +  [kExprNodeListLiteral] = { kEOpLvlParens, kEOpAssNo }, + +  [kExprNodeArrow] = { kEOpLvlArrow, kEOpAssNo }, + +  // Right associativity for comma because this means easier access to arguments +  // list, etc: for "[a, b, c, d]" you can access "a" in one step if it is +  // represented as "list(comma(a, comma(b, comma(c, d))))" then if it is +  // "list(comma(comma(comma(a, b), c), d))" in which case you will need to +  // traverse all three comma() structures. And with comma operator (including +  // actual comma operator from C which is not present in VimL) nobody cares +  // about associativity, only about order of execution. +  [kExprNodeComma] = { kEOpLvlComma, kEOpAssRight }, + +  // Colons are not eligible for chaining, so nobody cares about associativity. +  [kExprNodeColon] = { kEOpLvlColon, kEOpAssNo }, + +  [kExprNodeTernary] = { kEOpLvlTernary, kEOpAssRight }, + +  [kExprNodeOr] = { kEOpLvlOr, kEOpAssLeft }, + +  [kExprNodeAnd] = { kEOpLvlAnd, kEOpAssLeft }, + +  [kExprNodeTernaryValue] = { kEOpLvlTernaryValue, kEOpAssRight }, + +  [kExprNodeComparison] = { kEOpLvlComparison, kEOpAssRight }, + +  [kExprNodeBinaryPlus] = { kEOpLvlAddition, kEOpAssLeft }, +  [kExprNodeBinaryMinus] = { kEOpLvlAddition, kEOpAssLeft }, +  [kExprNodeConcat] = { kEOpLvlAddition, kEOpAssLeft }, + +  [kExprNodeMultiplication] = { kEOpLvlMultiplication, kEOpAssLeft }, +  [kExprNodeDivision] = { kEOpLvlMultiplication, kEOpAssLeft }, +  [kExprNodeMod] = { kEOpLvlMultiplication, kEOpAssLeft }, + +  [kExprNodeUnaryPlus] = { kEOpLvlUnary, kEOpAssNo }, +  [kExprNodeUnaryMinus] = { kEOpLvlUnary, kEOpAssNo }, +  [kExprNodeNot] = { kEOpLvlUnary, kEOpAssNo }, + +  [kExprNodeConcatOrSubscript] = { kEOpLvlSubscript, kEOpAssLeft }, + +  [kExprNodeCurlyBracesIdentifier] = { kEOpLvlComplexIdentifier, kEOpAssLeft }, + +  [kExprNodeAssignment] = { kEOpLvlAssignment, kEOpAssLeft }, + +  [kExprNodeComplexIdentifier] = { kEOpLvlValue, kEOpAssLeft }, + +  [kExprNodePlainIdentifier] = { kEOpLvlValue, kEOpAssNo }, +  [kExprNodePlainKey] = { kEOpLvlValue, kEOpAssNo }, +  [kExprNodeRegister] = { kEOpLvlValue, kEOpAssNo }, +  [kExprNodeInteger] = { kEOpLvlValue, kEOpAssNo }, +  [kExprNodeFloat] = { kEOpLvlValue, kEOpAssNo }, +  [kExprNodeDoubleQuotedString] = { kEOpLvlValue, kEOpAssNo }, +  [kExprNodeSingleQuotedString] = { kEOpLvlValue, kEOpAssNo }, +  [kExprNodeOption] = { kEOpLvlValue, kEOpAssNo }, +  [kExprNodeEnvironment] = { kEOpLvlValue, kEOpAssNo }, +}; + +/// Get AST node priority level +/// +/// Used primary to reduce line length, so keep the name short. +/// +/// @param[in]  node  Node to get priority for. +/// +/// @return Node priority level. +static inline ExprOpLvl node_lvl(const ExprASTNode node) +  FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT +{ +  return node_type_to_node_props[node.type].lvl; +} + +/// Get AST node associativity, to be used for operator nodes primary +/// +/// Used primary to reduce line length, so keep the name short. +/// +/// @param[in]  node  Node to get priority for. +/// +/// @return Node associativity. +static inline ExprOpAssociativity node_ass(const ExprASTNode node) +  FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT +{ +  return node_type_to_node_props[node.type].ass; +} + +/// Handle binary operator +/// +/// This function is responsible for handling priority levels as well. +/// +/// @param[in]  pstate  Parser state, used for error reporting. +/// @param  ast_stack  AST stack. May be popped of some values and will +///                    definitely receive new ones. +/// @param  bop_node  New node to handle. +/// @param[out]  want_node_p  New value of want_node. +/// @param[out]  ast_err  Location where error is saved, if any. +/// +/// @return True if no errors occurred, false otherwise. +static bool viml_pexpr_handle_bop(const ParserState *const pstate, +                                  ExprASTStack *const ast_stack, +                                  ExprASTNode *const bop_node, +                                  ExprASTWantedNode *const want_node_p, +                                  ExprASTError *const ast_err) +  FUNC_ATTR_NONNULL_ALL +{ +  bool ret = true; +  ExprASTNode **top_node_p = NULL; +  ExprASTNode *top_node; +  ExprOpLvl top_node_lvl; +  ExprOpAssociativity top_node_ass; +  assert(kv_size(*ast_stack)); +  const ExprOpLvl bop_node_lvl = ((bop_node->type == kExprNodeCall +                                   || bop_node->type == kExprNodeSubscript) +                                  ? kEOpLvlSubscript +                                  : node_lvl(*bop_node)); +#ifndef NDEBUG +  const ExprOpAssociativity bop_node_ass = ( +      (bop_node->type == kExprNodeCall +       || bop_node->type == kExprNodeSubscript) +      ? kEOpAssLeft +      : node_ass(*bop_node)); +#endif +  do { +    ExprASTNode **new_top_node_p = kv_last(*ast_stack); +    ExprASTNode *new_top_node = *new_top_node_p; +    assert(new_top_node != NULL); +    const ExprOpLvl new_top_node_lvl = node_lvl(*new_top_node); +    const ExprOpAssociativity new_top_node_ass = node_ass(*new_top_node); +    assert(bop_node_lvl != new_top_node_lvl +           || bop_node_ass == new_top_node_ass); +    if (top_node_p != NULL +        && ((bop_node_lvl > new_top_node_lvl +             || (bop_node_lvl == new_top_node_lvl +                 && new_top_node_ass == kEOpAssNo)))) { +      break; +    } +    kv_drop(*ast_stack, 1); +    top_node_p = new_top_node_p; +    top_node = new_top_node; +    top_node_lvl = new_top_node_lvl; +    top_node_ass = new_top_node_ass; +    if (bop_node_lvl == top_node_lvl && top_node_ass == kEOpAssRight) { +      break; +    } +  } while (kv_size(*ast_stack)); +  if (top_node_ass == kEOpAssLeft || top_node_lvl != bop_node_lvl) { +    // outer(op(x,y)) -> outer(new_op(op(x,y),*)) +    // +    // Before: top_node_p = outer(*), points to op(x,y) +    //         Other stack elements unknown +    // +    // After: top_node_p = outer(*), points to new_op(op(x,y)) +    //        &bop_node->children->next = new_op(op(x,y),*), points to NULL +    *top_node_p = bop_node; +    bop_node->children = top_node; +    assert(bop_node->children->next == NULL); +    kvi_push(*ast_stack, top_node_p); +    kvi_push(*ast_stack, &bop_node->children->next); +  } else { +    assert(top_node_lvl == bop_node_lvl && top_node_ass == kEOpAssRight); +    assert(top_node->children != NULL && top_node->children->next != NULL); +    // outer(op(x,y)) -> outer(op(x,new_op(y,*))) +    // +    // Before: top_node_p = outer(*), points to op(x,y) +    //         Other stack elements unknown +    // +    // After: top_node_p = outer(*), points to op(x,new_op(y)) +    //        &top_node->children->next = op(x,*), points to new_op(y) +    //        &bop_node->children->next = new_op(y,*), points to NULL +    bop_node->children = top_node->children->next; +    top_node->children->next = bop_node; +    assert(bop_node->children->next == NULL); +    kvi_push(*ast_stack, top_node_p); +    kvi_push(*ast_stack, &top_node->children->next); +    kvi_push(*ast_stack, &bop_node->children->next); +    // TODO(ZyX-I): Make this not error, but treat like Python does +    if (bop_node->type == kExprNodeComparison) { +      east_set_error(pstate, ast_err, +                     _("E15: Operator is not associative: %.*s"), +                     bop_node->start); +      ret = false; +    } +  } +  *want_node_p = kENodeValue; +  return ret; +} + +/// ParserPosition literal based on ParserPosition pos with columns shifted +/// +/// Function does not check whether resulting position is valid. +/// +/// @param[in]  pos  Position to shift. +/// @param[in]  shift  Number of bytes to shift. +/// +/// @return Shifted position. +static inline ParserPosition shifted_pos(const ParserPosition pos, +                                         const size_t shift) +  FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT +{ +  return (ParserPosition) { .line = pos.line, .col = pos.col + shift }; +} + +/// ParserPosition literal based on ParserPosition pos with specified column +/// +/// Function does not check whether remaining position is valid. +/// +/// @param[in]  pos  Position to adjust. +/// @param[in]  new_col  New column. +/// +/// @return Shifted position. +static inline ParserPosition recol_pos(const ParserPosition pos, +                                       const size_t new_col) +  FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT +{ +  return (ParserPosition) { .line = pos.line, .col = new_col }; +} + +/// Get highlight group name +#define HL(g) (is_invalid ? "NvimInvalid" #g : "Nvim" #g) + +/// Highlight current token with the given group +#define HL_CUR_TOKEN(g) \ +        viml_parser_highlight(pstate, cur_token.start, cur_token.len, \ +                              HL(g)) + +/// Allocate new node, saving some values +#define NEW_NODE(type) \ +    viml_pexpr_new_node(type) + +/// Set position of the given node to position from the given token +/// +/// @param  cur_node  Node to modify. +/// @param  cur_token  Token to set position from. +#define POS_FROM_TOKEN(cur_node, cur_token) \ +    do { \ +      (cur_node)->start = cur_token.start; \ +      (cur_node)->len = cur_token.len; \ +    } while (0) + +/// Allocate new node and set its position from the current token +/// +/// If previous token happened to contain spacing then it will be included. +/// +/// @param  cur_node  Variable to save allocated node to. +/// @param  typ  Node type. +#define NEW_NODE_WITH_CUR_POS(cur_node, typ) \ +    do { \ +      (cur_node) = NEW_NODE(typ); \ +      POS_FROM_TOKEN((cur_node), cur_token); \ +      if (prev_token.type == kExprLexSpacing) { \ +        (cur_node)->start = prev_token.start; \ +        (cur_node)->len += prev_token.len; \ +      } \ +    } while (0) + +/// Check whether it is possible to have next expression after current +/// +/// For :echo: `:echo @a @a` is a valid expression. `:echo (@a @a)` is not. +#define MAY_HAVE_NEXT_EXPR \ +    (kv_size(ast_stack) == 1) + +/// Add operator node +/// +/// @param[in]  cur_node  Node to add. +#define ADD_OP_NODE(cur_node) \ +    is_invalid |= !viml_pexpr_handle_bop(pstate, &ast_stack, cur_node, \ +                                         &want_node, &ast.err) + +/// Record missing operator: for things like +/// +///     :echo @a @a +/// +/// (allowed) or +/// +///     :echo (@a @a) +/// +/// (parsed as OpMissing(@a, @a)). +#define OP_MISSING \ +    do { \ +      if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) { \ +        /* Multiple expressions allowed, return without calling */ \ +        /* viml_parser_advance(). */ \ +        goto viml_pexpr_parse_end; \ +      } else { \ +        assert(*top_node_p != NULL); \ +        ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Missing operator: %.*s")); \ +        NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOpMissing); \ +        cur_node->len = 0; \ +        ADD_OP_NODE(cur_node); \ +        goto viml_pexpr_parse_process_token; \ +      } \ +    } while (0) + +/// Record missing value: for things like "* 5" +/// +/// @param[in]  msg  Error message. +#define ADD_VALUE_IF_MISSING(msg) \ +        do { \ +          if (want_node == kENodeValue) { \ +            ERROR_FROM_TOKEN_AND_MSG(cur_token, (msg)); \ +            NEW_NODE_WITH_CUR_POS((*top_node_p), kExprNodeMissing); \ +            (*top_node_p)->len = 0; \ +            want_node = kENodeOperator; \ +          } \ +        } while (0) + +/// Set AST error, unless AST already is not correct +/// +/// @param[out]  ret_ast  AST to set error in. +/// @param[in]  pstate  Parser state, used to get error message argument. +/// @param[in]  msg  Error message, assumed to be already translated and +///                  containing a single %token "%.*s". +/// @param[in]  start  Position at which error occurred. +static inline void east_set_error(const ParserState *const pstate, +                                  ExprASTError *const ret_ast_err, +                                  const char *const msg, +                                  const ParserPosition start) +  FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE +{ +  if (ret_ast_err->msg != NULL) { +    return; +  } +  const ParserLine pline = pstate->reader.lines.items[start.line]; +  ret_ast_err->msg = msg; +  ret_ast_err->arg_len = (int)(pline.size - start.col); +  ret_ast_err->arg = pline.data + start.col; +} + +/// Set error from the given token and given message +#define ERROR_FROM_TOKEN_AND_MSG(cur_token, msg) \ +    do { \ +      is_invalid = true; \ +      east_set_error(pstate, &ast.err, msg, cur_token.start); \ +    } while (0) + +/// Like #ERROR_FROM_TOKEN_AND_MSG, but gets position from a node +#define ERROR_FROM_NODE_AND_MSG(node, msg) \ +    do { \ +      is_invalid = true; \ +      east_set_error(pstate, &ast.err, msg, node->start); \ +    } while (0) + +/// Set error from the given kExprLexInvalid token +#define ERROR_FROM_TOKEN(cur_token) \ +    ERROR_FROM_TOKEN_AND_MSG(cur_token, cur_token.data.err.msg) + +/// Select figure brace type, altering highlighting as well if needed +/// +/// @param[out]  node  Node to modify type. +/// @param[in]  new_type  New type, one of ExprASTNodeType values without +///                       kExprNode prefix. +/// @param[in]  hl  Corresponding highlighting, passed as an argument to #HL. +#define SELECT_FIGURE_BRACE_TYPE(node, new_type, hl) \ +    do { \ +      ExprASTNode *const node_ = (node); \ +      assert(node_->type == kExprNodeUnknownFigure \ +             || node_->type == kExprNode##new_type); \ +      node_->type = kExprNode##new_type; \ +      if (pstate->colors) { \ +        kv_A(*pstate->colors, node_->data.fig.opening_hl_idx).group = \ +             HL(hl); \ +      } \ +    } while (0) + +/// Add identifier which should constitute complex identifier node +/// +/// This one is to be called only in case want_node is kENodeOperator. +/// +/// @param  new_ident_node_code  Code used to create a new identifier node and +///                              update want_node and ast_stack, without +///                              a trailing semicolon. +/// @param  hl  Highlighting name to use, passed as an argument to #HL. +#define ADD_IDENT(new_ident_node_code, hl) \ +    do { \ +      assert(want_node == kENodeOperator); \ +      /* Operator: may only be curly braces name, but only under certain */ \ +      /* conditions. */ \ +\ +      /* First condition is that there is no space before a part of complex */ \ +      /* identifier. */ \ +      if (prev_token.type == kExprLexSpacing) { \ +        OP_MISSING; \ +      } \ +      switch ((*top_node_p)->type) { \ +        /* Second is that previous node is one of the identifiers: */ \ +        /* complex, plain, curly braces. */ \ +\ +        /* TODO(ZyX-I): Extend syntax to allow ${expr}. This is needed to */ \ +        /* handle environment variables like those bash uses for */ \ +        /* `export -f`: their names consist not only of alphanumeric */ \ +        /* characetrs. */ \ +        case kExprNodeComplexIdentifier: \ +        case kExprNodePlainIdentifier: \ +        case kExprNodeCurlyBracesIdentifier: { \ +          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComplexIdentifier); \ +          cur_node->len = 0; \ +          cur_node->children = *top_node_p; \ +          *top_node_p = cur_node; \ +          kvi_push(ast_stack, &cur_node->children->next); \ +          ExprASTNode **const new_top_node_p = kv_last(ast_stack); \ +          assert(*new_top_node_p == NULL); \ +          new_ident_node_code; \ +          *new_top_node_p = cur_node; \ +          HL_CUR_TOKEN(hl); \ +          break; \ +        } \ +        default: { \ +          OP_MISSING; \ +          break; \ +        } \ +      } \ +    } while (0) + +/// Determine whether given parse type is an assignment +/// +/// @param[in]  pt  Checked parse type. +/// +/// @return true if parsing an assignment, false otherwise. +static inline bool pt_is_assignment(const ExprASTParseType pt) +  FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT +{ +  return (pt == kEPTAssignment || pt == kEPTSingleAssignment); +} + +/// Structure used to define “string shifts” necessary to map string +/// highlighting to actual strings. +typedef struct { +  size_t start;  ///< Where special character starts in original string. +  size_t orig_len;  ///< Length of orininal string (e.g. 4 for "\x80"). +  size_t act_len;  ///< Length of resulting character(s) (e.g. 1 for "\x80"). +  bool escape_not_known;  ///< True if escape sequence in original is not known. +} StringShift; + +/// Parse and highlight single- or double-quoted string +/// +/// Function is supposed to detect and highlight regular expressions (but does +/// not do now). +/// +/// @param[out]  pstate  Parser state which also contains a place where +///                      highlighting is saved. +/// @param[out]  node  Node where string parsing results are saved. +/// @param[in]  token  Token to highlight. +/// @param[in]  ast_stack  Parser AST stack, used to detect whether current +///                        string is a regex. +/// @param[in]  is_invalid  Whether currently processed token is not valid. +static void parse_quoted_string(ParserState *const pstate, +                                ExprASTNode *const node, +                                const LexExprToken token, +                                const ExprASTStack ast_stack, +                                const bool is_invalid) +  FUNC_ATTR_NONNULL_ALL +{ +  const ParserLine pline = pstate->reader.lines.items[token.start.line]; +  const char *const s = pline.data + token.start.col; +  const char *const e = s + token.len - token.data.str.closed; +  const char *p = s + 1; +  const bool is_double = (token.type == kExprLexDoubleQuotedString); +  size_t size = token.len - token.data.str.closed - 1; +  kvec_withinit_t(StringShift, 16) shifts; +  kvi_init(shifts); +  if (!is_double) { +    viml_parser_highlight(pstate, token.start, 1, HL(SingleQuote)); +    while (p < e) { +      const char *const chunk_e = memchr(p, '\'', (size_t)(e - p)); +      if (chunk_e == NULL) { +        break; +      } +      size--; +      p = chunk_e + 2; +      if (pstate->colors) { +        kvi_push(shifts, ((StringShift) { +            .start = token.start.col + (size_t)(chunk_e - s), +            .orig_len = 2, +            .act_len = 1, +            .escape_not_known = false, +        })); +      } +    } +    node->data.str.size = size; +    if (size == 0) { +      node->data.str.value = NULL; +    } else { +      char *v_p; +      v_p = node->data.str.value = xmallocz(size); +      p = s + 1; +      while (p < e) { +        const char *const chunk_e = memchr(p, '\'', (size_t)(e - p)); +        if (chunk_e == NULL) { +          memcpy(v_p, p, (size_t)(e - p)); +          break; +        } +        memcpy(v_p, p, (size_t)(chunk_e - p)); +        v_p += (size_t)(chunk_e - p) + 1; +        v_p[-1] = '\''; +        p = chunk_e + 2; +      } +    } +  } else { +    viml_parser_highlight(pstate, token.start, 1, HL(DoubleQuote)); +    for (p = s + 1; p < e; p++) { +      if (*p == '\\' && p + 1 < e) { +        p++; +        if (p + 1 == e) { +          size--; +          break; +        } +        switch (*p) { +          // A "\<x>" form occupies at least 4 characters, and produces up to +          // 6 characters: reserve space for 2 extra, but do not compute actual +          // length just now, it would be costy. +          case '<': { +            size += 2; +            break; +          } +          // Hexadecimal, always single byte, but at least three bytes each. +          case 'x': case 'X': { +            size--; +            if (ascii_isxdigit(p[1])) { +              size--; +              if (p + 2 < e && ascii_isxdigit(p[2])) { +                size--; +              } +            } +            break; +          } +          // Unicode +          // +          // \uF takes 1 byte which is 2 bytes less then escape sequence. +          // \uFF: 2 bytes, 2 bytes less. +          // \uFFF: 3 bytes, 2 bytes less. +          // \uFFFF: 3 bytes, 3 bytes less. +          // \UFFFFF: 4 bytes, 3 bytes less. +          // \UFFFFFF: 5 bytes, 3 bytes less. +          // \UFFFFFFF: 6 bytes, 3 bytes less. +          // \U7FFFFFFF: 6 bytes, 4 bytes less. +          case 'u': case 'U': { +            const char *const esc_start = p; +            size_t n = (*p == 'u' ? 4 : 8); +            int nr = 0; +            p++; +            while (p + 1 < e && n-- && ascii_isxdigit(p[1])) { +              p++; +              nr = (nr << 4) + hex2nr(*p); +            } +            // Escape length: (esc_start - 1) points to "\\", esc_start to "u" +            // or "U", p to the byte after last byte. So escape sequence +            // occupies p - (esc_start - 1), but it stands for a utf_char2len +            // bytes. +            size -= (size_t)((p - (esc_start - 1)) - utf_char2len(nr)); +            p--; +            break; +          } +          // Octal, always single byte, but at least two bytes each. +          case '0': case '1': case '2': case '3': case '4': case '5': case '6': +          case '7': { +            size--; +            p++; +            if (*p >= '0' && *p <= '7') { +              size--; +              p++; +              if (p < e && *p >= '0' && *p <= '7') { +                size--; +                p++; +              } +            } +            break; +          } +          default: { +            size--; +            break; +          } +        } +      } +    } +    if (size == 0) { +      node->data.str.value = NULL; +      node->data.str.size = 0; +    } else { +      char *v_p; +      v_p = node->data.str.value = xmalloc(size); +      p = s + 1; +      while (p < e) { +        const char *const chunk_e = memchr(p, '\\', (size_t)(e - p)); +        if (chunk_e == NULL) { +          memcpy(v_p, p, (size_t)(e - p)); +          v_p += e - p; +          break; +        } +        memcpy(v_p, p, (size_t)(chunk_e - p)); +        v_p += (size_t)(chunk_e - p); +        p = chunk_e + 1; +        if (p == e) { +          *v_p++ = '\\'; +          break; +        } +        bool is_unknown = false; +        const char *const v_p_start = v_p; +        switch (*p) { +#define SINGLE_CHAR_ESC(ch, real_ch) \ +          case ch: { \ +            *v_p++ = real_ch; \ +            p++; \ +            break; \ +          } +          SINGLE_CHAR_ESC('b', BS) +          SINGLE_CHAR_ESC('e', ESC) +          SINGLE_CHAR_ESC('f', FF) +          SINGLE_CHAR_ESC('n', NL) +          SINGLE_CHAR_ESC('r', CAR) +          SINGLE_CHAR_ESC('t', TAB) +          SINGLE_CHAR_ESC('"', '"') +          SINGLE_CHAR_ESC('\\', '\\') +#undef SINGLE_CHAR_ESC + +          // Hexadecimal or unicode. +          case 'X': case 'x': case 'u': case 'U': { +            if (p + 1 < e && ascii_isxdigit(p[1])) { +              size_t n; +              int nr; +              bool is_hex = (*p == 'x' || *p == 'X'); + +              if (is_hex) { +                n = 2; +              } else if (*p == 'u') { +                n = 4; +              } else { +                n = 8; +              } +              nr = 0; +              while (p + 1 < e && n-- && ascii_isxdigit(p[1])) { +                p++; +                nr = (nr << 4) + hex2nr(*p); +              } +              p++; +              if (is_hex) { +                *v_p++ = (char)nr; +              } else { +                v_p += utf_char2bytes(nr, (char_u *)v_p); +              } +            } else { +              is_unknown = true; +              *v_p++ = *p; +              p++; +            } +            break; +          } +          // Octal: "\1", "\12", "\123". +          case '0': case '1': case '2': case '3': case '4': case '5': case '6': +          case '7': { +            uint8_t ch = (uint8_t)(*p++ - '0'); +            if (p < e && *p >= '0' && *p <= '7') { +              ch = (uint8_t)((ch << 3) + *p++ - '0'); +              if (p < e && *p >= '0' && *p <= '7') { +                ch = (uint8_t)((ch << 3) + *p++ - '0'); +              } +            } +            *v_p++ = (char)ch; +            break; +          } +          // Special key, e.g.: "\<C-W>" +          case '<': { +            const size_t special_len = ( +                trans_special((const char_u **)&p, (size_t)(e - p), +                              (char_u *)v_p, true, true)); +            if (special_len != 0) { +              v_p += special_len; +            } else { +              is_unknown = true; +              mb_copy_char((const char_u **)&p, (char_u **)&v_p); +            } +            break; +          } +          default: { +            is_unknown = true; +            mb_copy_char((const char_u **)&p, (char_u **)&v_p); +            break; +          } +        } +        if (pstate->colors) { +          kvi_push(shifts, ((StringShift) { +              .start = token.start.col + (size_t)(chunk_e - s), +              .orig_len = (size_t)(p - chunk_e), +              .act_len = (size_t)(v_p - (char *)v_p_start), +              .escape_not_known = is_unknown, +          })); +        } +      } +      node->data.str.size = (size_t)(v_p - node->data.str.value); +    } +  } +  if (pstate->colors) { +    // TODO(ZyX-I): use ast_stack to determine and highlight regular expressions +    // TODO(ZyX-I): use ast_stack to determine and highlight printf format str +    // TODO(ZyX-I): use ast_stack to determine and highlight expression strings +    size_t next_col = token.start.col + 1; +    const char *const body_str = (is_double +                                  ? HL(DoubleQuotedBody) +                                  : HL(SingleQuotedBody)); +    const char *const esc_str = (is_double +                                 ? HL(DoubleQuotedEscape) +                                 : HL(SingleQuotedQuote)); +    const char *const ukn_esc_str = (is_double +                                     ? HL(DoubleQuotedUnknownEscape) +                                     : HL(SingleQuotedUnknownEscape)); +    for (size_t i = 0; i < kv_size(shifts); i++) { +      const StringShift cur_shift = kv_A(shifts, i); +      if (cur_shift.start > next_col) { +        viml_parser_highlight(pstate, recol_pos(token.start, next_col), +                              cur_shift.start - next_col, +                              body_str); +      } +      viml_parser_highlight(pstate, recol_pos(token.start, cur_shift.start), +                            cur_shift.orig_len, +                            (cur_shift.escape_not_known +                             ? ukn_esc_str +                             : esc_str)); +      next_col = cur_shift.start + cur_shift.orig_len; +    } +    if (next_col - token.start.col < token.len - token.data.str.closed) { +      viml_parser_highlight(pstate, recol_pos(token.start, next_col), +                            (token.start.col +                             + token.len +                             - token.data.str.closed +                             - next_col), +                            body_str); +    } +  } +  if (token.data.str.closed) { +    if (is_double) { +      viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1), +                            1, HL(DoubleQuote)); +    } else { +      viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1), +                            1, HL(SingleQuote)); +    } +  } +  kvi_destroy(shifts); +} + +/// Additional flags to pass to lexer depending on want_node +static const int want_node_to_lexer_flags[] = { +  [kENodeValue] = kELFlagIsNotCmp, +  [kENodeOperator] = kELFlagForbidScope, +}; + +/// Number of characters to highlight as NumberPrefix depending on the base +static const uint8_t base_to_prefix_length[] = { +  [2] = 2, +  [8] = 1, +  [10] = 0, +  [16] = 2, +}; + +/// Parse one VimL expression +/// +/// @param  pstate  Parser state. +/// @param[in]  flags  Additional flags, see ExprParserFlags +/// +/// @return Parsed AST. +ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) +  FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL +{ +  ExprAST ast = { +    .err = { +      .msg = NULL, +      .arg_len = 0, +      .arg = NULL, +    }, +    .root = NULL, +  }; +  // Expression stack contains current branch in AST tree: that is +  // - Stack item 0 contains root of the tree, i.e. &ast->root. +  // - Stack item i points to the previous stack items’ last child. +  // +  // When parser expects “value” node that is something like identifier or "[" +  // (list start) last stack item contains NULL. Otherwise last stack item is +  // supposed to contain last “finished” value: e.g. "1" or "+(1, 1)" (node +  // representing "1+1"). +  ExprASTStack ast_stack; +  kvi_init(ast_stack); +  kvi_push(ast_stack, &ast.root); +  ExprASTWantedNode want_node = kENodeValue; +  ExprASTParseTypeStack pt_stack; +  kvi_init(pt_stack); +  kvi_push(pt_stack, kEPTExpr); +  if (flags & kExprFlagsParseLet) { +    kvi_push(pt_stack, kEPTAssignment); +  } +  LexExprToken prev_token = { .type = kExprLexMissing }; +  bool highlighted_prev_spacing = false; +  // Lambda node, valid when parsing lambda arguments only. +  ExprASTNode *lambda_node = NULL; +  size_t asgn_level = 0; +  do { +    const bool is_concat_or_subscript = ( +        want_node == kENodeValue +        && kv_size(ast_stack) > 1 +        && (*kv_Z(ast_stack, 1))->type == kExprNodeConcatOrSubscript); +    const int lexer_additional_flags = ( +        kELFlagPeek +        | ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0) +        | ((want_node == kENodeValue +            && (kv_size(ast_stack) == 1 +                || ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat +                    && ((*kv_Z(ast_stack, 1))->type +                        != kExprNodeConcatOrSubscript)))) +           ? kELFlagAllowFloat +           : 0)); +    LexExprToken cur_token = viml_pexpr_next_token( +        pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags); +    if (cur_token.type == kExprLexEOC) { +      break; +    } +    LexExprTokenType tok_type = cur_token.type; +    const bool token_invalid = (tok_type == kExprLexInvalid); +    bool is_invalid = token_invalid; +viml_pexpr_parse_process_token: +    // May use different flags this time. +    cur_token = viml_pexpr_next_token( +        pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags); +    if (tok_type == kExprLexSpacing) { +      if (is_invalid) { +        HL_CUR_TOKEN(Spacing); +      } else { +        // Do not do anything: let regular spacing be highlighted as normal. +        // This also allows later to highlight spacing as invalid. +      } +      goto viml_pexpr_parse_cycle_end; +    } else if (is_invalid && prev_token.type == kExprLexSpacing +               && !highlighted_prev_spacing) { +      viml_parser_highlight(pstate, prev_token.start, prev_token.len, +                            HL(Spacing)); +      is_invalid = false; +      highlighted_prev_spacing = true; +    } +    const ParserLine pline = pstate->reader.lines.items[cur_token.start.line]; +    ExprASTNode **const top_node_p = kv_last(ast_stack); +    assert(kv_size(ast_stack) >= 1); +    ExprASTNode *cur_node = NULL; +#ifndef NDEBUG +    const bool want_value = (want_node == kENodeValue); +    assert(want_value == (*top_node_p == NULL)); +    assert(kv_A(ast_stack, 0) == &ast.root); +    // Check that stack item i + 1 points to stack items’ i *last* child. +    for (size_t i = 0; i + 1 < kv_size(ast_stack); i++) { +      const bool item_null = (want_value && i + 2 == kv_size(ast_stack)); +      assert((&(*kv_A(ast_stack, i))->children == kv_A(ast_stack, i + 1) +              && (item_null +                  ? (*kv_A(ast_stack, i))->children == NULL +                  : (*kv_A(ast_stack, i))->children->next == NULL)) +             || ((&(*kv_A(ast_stack, i))->children->next +                  == kv_A(ast_stack, i + 1)) +                 && (item_null +                     ? (*kv_A(ast_stack, i))->children->next == NULL +                     : (*kv_A(ast_stack, i))->children->next->next == NULL))); +    } +#endif +    // Note: in Vim whether expression "cond?d.a:2" is valid depends both on +    // "cond" and whether "d" is a dictionary: expression is valid if condition +    // is true and "d" is a dictionary (with "a" key or it will complain about +    // missing one, but this is not relevant); if any of the requirements is +    // broken then this thing is parsed as "d . a:2" yielding missing colon +    // error. This parser does not allow such ambiguity, especially because it +    // simply can’t: whether "d" is a dictionary is not known at the parsing +    // time. +    // +    // Here example will always contain a concat with "a:2" sucking colon, +    // making expression invalid both because there is no longer a spare colon +    // for ternary and because concatenating dictionary with anything is not +    // valid. There are more cases when this will make a difference though. +    const bool node_is_key = ( +        is_concat_or_subscript +        && (cur_token.type == kExprLexPlainIdentifier +            ? (!cur_token.data.var.autoload +               && cur_token.data.var.scope == kExprVarScopeMissing) +            : (cur_token.type == kExprLexNumber)) +        && prev_token.type != kExprLexSpacing); +    if (is_concat_or_subscript && !node_is_key) { +      // Note: in Vim "d. a" (this is the reason behind `prev_token.type != +      // kExprLexSpacing` part of the condition) as well as any other "d.{expr}" +      // where "{expr}" does not look like a key is invalid whenever "d" happens +      // to be a dictionary. Since parser has no idea whether preceding +      // expression is actually a dictionary it can’t outright reject anything, +      // so it turns kExprNodeConcatOrSubscript into kExprNodeConcat instead, +      // which will yield different errors then Vim does in a number of +      // circumstances, and in any case runtime and not parse time errors. +      (*kv_Z(ast_stack, 1))->type = kExprNodeConcat; +    } +    // Pop some stack pt_stack items in case of misplaced nodes. +    const bool is_single_assignment = kv_last(pt_stack) == kEPTSingleAssignment; +    switch (kv_last(pt_stack)) { +      case kEPTExpr: { +        break; +      } +      case kEPTLambdaArguments: { +        if ((want_node == kENodeOperator +             && tok_type != kExprLexComma +             && tok_type != kExprLexArrow) +            || (want_node == kENodeValue +                && !(cur_token.type == kExprLexPlainIdentifier +                     && cur_token.data.var.scope == kExprVarScopeMissing +                     && !cur_token.data.var.autoload) +                && tok_type != kExprLexArrow)) { +          lambda_node->data.fig.type_guesses.allow_lambda = false; +          if (lambda_node->children != NULL +              && lambda_node->children->type == kExprNodeComma) { +            // If lambda has comma child this means that parser has already seen +            // at least "{arg1,", so node cannot possibly be anything, but +            // lambda. + +            // Vim may give E121 or E720 in this case, but it does not look +            // right to have either because both are results of reevaluation +            // possibly-lambda node as a dictionary and here this is not going +            // to happen. +            ERROR_FROM_TOKEN_AND_MSG( +                cur_token, +                _("E15: Expected lambda arguments list or arrow: %.*s")); +          } else { +            // Else it may appear that possibly-lambda node is actually +            // a dictionary or curly-braces-name identifier. +            lambda_node = NULL; +            kv_drop(pt_stack, 1); +          } +        } +        break; +      } +      case kEPTSingleAssignment: +      case kEPTAssignment: { +        if (want_node == kENodeValue +            && tok_type != kExprLexBracket +            && tok_type != kExprLexPlainIdentifier +            && (tok_type != kExprLexFigureBrace || cur_token.data.brc.closing) +            && !(node_is_key && tok_type == kExprLexNumber) +            && tok_type != kExprLexEnv +            && tok_type != kExprLexOption +            && tok_type != kExprLexRegister) { +          ERROR_FROM_TOKEN_AND_MSG( +              cur_token, +              _("E15: Expected value part of assignment lvalue: %.*s")); +          kv_drop(pt_stack, 1); +        } else if (want_node == kENodeOperator +                   && tok_type != kExprLexBracket +                   && (tok_type != kExprLexFigureBrace +                       || cur_token.data.brc.closing) +                   && tok_type != kExprLexDot +                   && (tok_type != kExprLexComma || !is_single_assignment) +                   && tok_type != kExprLexAssignment +                   // Curly brace identifiers: will contain plain identifier or +                   // another curly brace in position where operator is wanted. +                   && !((tok_type == kExprLexPlainIdentifier +                         || (tok_type == kExprLexFigureBrace +                             && !cur_token.data.brc.closing)) +                        && prev_token.type != kExprLexSpacing)) { +          if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) { +            goto viml_pexpr_parse_end; +          } +          ERROR_FROM_TOKEN_AND_MSG( +              cur_token, +              _("E15: Expected assignment operator or subscript: %.*s")); +          kv_drop(pt_stack, 1); +        } +        assert(kv_size(pt_stack)); +        break; +      } +    } +    assert(kv_size(pt_stack)); +    const ExprASTParseType cur_pt = kv_last(pt_stack); +    assert(lambda_node == NULL || cur_pt == kEPTLambdaArguments); +    switch (tok_type) { +      case kExprLexMissing: +      case kExprLexSpacing: +      case kExprLexEOC: { +        assert(false); +      } +      case kExprLexInvalid: { +        ERROR_FROM_TOKEN(cur_token); +        tok_type = cur_token.data.err.type; +        goto viml_pexpr_parse_process_token; +      } +      case kExprLexRegister: { +        if (want_node == kENodeOperator) { +          // Register in operator position: e.g. @a @a +          OP_MISSING; +        } +        NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeRegister); +        cur_node->data.reg.name = cur_token.data.reg.name; +        *top_node_p = cur_node; +        want_node = kENodeOperator; +        HL_CUR_TOKEN(Register); +        break; +      } +#define SIMPLE_UB_OP(op) \ +      case kExprLex##op: { \ +        if (want_node == kENodeValue) { \ +          /* Value level: assume unary operator. */ \ +          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnary##op); \ +          *top_node_p = cur_node; \ +          kvi_push(ast_stack, &cur_node->children); \ +          HL_CUR_TOKEN(Unary##op); \ +        } else { \ +          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeBinary##op); \ +          ADD_OP_NODE(cur_node); \ +          HL_CUR_TOKEN(Binary##op); \ +        } \ +        want_node = kENodeValue; \ +        break; \ +      } +      SIMPLE_UB_OP(Plus) +      SIMPLE_UB_OP(Minus) +#undef SIMPLE_UB_OP +#define SIMPLE_B_OP(op, msg) \ +      case kExprLex##op: { \ +        ADD_VALUE_IF_MISSING(_("E15: Unexpected " msg ": %.*s")); \ +        NEW_NODE_WITH_CUR_POS(cur_node, kExprNode##op); \ +        HL_CUR_TOKEN(op); \ +        ADD_OP_NODE(cur_node); \ +        break; \ +      } +      SIMPLE_B_OP(Or, "or operator") +      SIMPLE_B_OP(And, "and operator") +#undef SIMPLE_B_OP +      case kExprLexMultiplication: { +        ADD_VALUE_IF_MISSING( +            _("E15: Unexpected multiplication-like operator: %.*s")); +        switch (cur_token.data.mul.type) { +#define MUL_OP(lex_op_tail, node_op_tail) \ +          case kExprLexMul##lex_op_tail: { \ +            NEW_NODE_WITH_CUR_POS(cur_node, kExprNode##node_op_tail); \ +            HL_CUR_TOKEN(node_op_tail); \ +            break; \ +          } +          MUL_OP(Mul, Multiplication) +          MUL_OP(Div, Division) +          MUL_OP(Mod, Mod) +#undef MUL_OP +        } +        ADD_OP_NODE(cur_node); +        break; +      } +      case kExprLexOption: { +        if (want_node == kENodeOperator) { +          OP_MISSING; +        } +        NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOption); +        if (cur_token.type == kExprLexInvalid) { +          assert(cur_token.len == 1 +                 || (cur_token.len == 3 +                     && pline.data[cur_token.start.col + 2] == ':')); +          cur_node->data.opt.ident = ( +              pline.data + cur_token.start.col + cur_token.len); +          cur_node->data.opt.ident_len = 0; +          cur_node->data.opt.scope = ( +              cur_token.len == 3 +              ? (ExprOptScope)pline.data[cur_token.start.col + 1] +              : kExprOptScopeUnspecified); +        } else { +          cur_node->data.opt.ident = cur_token.data.opt.name; +          cur_node->data.opt.ident_len = cur_token.data.opt.len; +          cur_node->data.opt.scope = cur_token.data.opt.scope; +        } +        *top_node_p = cur_node; +        want_node = kENodeOperator; +        viml_parser_highlight(pstate, cur_token.start, 1, HL(OptionSigil)); +        const size_t scope_shift = ( +            cur_token.data.opt.scope == kExprOptScopeUnspecified ? 0 : 2); +        if (scope_shift) { +          viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1, +                                HL(OptionScope)); +          viml_parser_highlight(pstate, shifted_pos(cur_token.start, 2), 1, +                                HL(OptionScopeDelimiter)); +        } +        viml_parser_highlight( +            pstate, shifted_pos(cur_token.start, scope_shift + 1), +            cur_token.len - (scope_shift + 1), HL(OptionName)); +        break; +      } +      case kExprLexEnv: { +        if (want_node == kENodeOperator) { +          OP_MISSING; +        } +        NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeEnvironment); +        cur_node->data.env.ident = pline.data + cur_token.start.col + 1; +        cur_node->data.env.ident_len = cur_token.len - 1; +        if (cur_node->data.env.ident_len == 0) { +          ERROR_FROM_TOKEN_AND_MSG(cur_token, +                                   _("E15: Environment variable name missing")); +        } +        *top_node_p = cur_node; +        want_node = kENodeOperator; +        viml_parser_highlight(pstate, cur_token.start, 1, HL(EnvironmentSigil)); +        viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), +                              cur_token.len - 1, HL(EnvironmentName)); +        break; +      } +      case kExprLexNot: { +        if (want_node == kENodeOperator) { +          OP_MISSING; +        } +        NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNot); +        *top_node_p = cur_node; +        kvi_push(ast_stack, &cur_node->children); +        HL_CUR_TOKEN(Not); +        break; +      } +      case kExprLexComparison: { +        ADD_VALUE_IF_MISSING( +            _("E15: Expected value, got comparison operator: %.*s")); +        NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComparison); +        if (cur_token.type == kExprLexInvalid) { +          cur_node->data.cmp.ccs = kCCStrategyUseOption; +          cur_node->data.cmp.type = kExprCmpEqual; +          cur_node->data.cmp.inv = false; +        } else { +          cur_node->data.cmp.ccs = cur_token.data.cmp.ccs; +          cur_node->data.cmp.type = cur_token.data.cmp.type; +          cur_node->data.cmp.inv = cur_token.data.cmp.inv; +        } +        ADD_OP_NODE(cur_node); +        if (cur_token.data.cmp.ccs != kCCStrategyUseOption) { +          viml_parser_highlight(pstate, cur_token.start, cur_token.len - 1, +                                HL(Comparison)); +          viml_parser_highlight( +              pstate, shifted_pos(cur_token.start, cur_token.len - 1), 1, +              HL(ComparisonModifier)); +        } else { +          HL_CUR_TOKEN(Comparison); +        } +        want_node = kENodeValue; +        break; +      } +      case kExprLexComma: { +        assert(!(want_node == kENodeValue && cur_pt == kEPTLambdaArguments)); +        if (want_node == kENodeValue) { +          // Value level: comma appearing here is not valid. +          // Note: in Vim string(,x) will give E116, this is not the case here. +          ERROR_FROM_TOKEN_AND_MSG( +              cur_token, _("E15: Expected value, got comma: %.*s")); +          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing); +          cur_node->len = 0; +          *top_node_p = cur_node; +          want_node = kENodeOperator; +        } +        if (cur_pt == kEPTLambdaArguments) { +          assert(lambda_node != NULL); +          assert(lambda_node->data.fig.type_guesses.allow_lambda); +          SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda); +        } +        if (kv_size(ast_stack) < 2) { +          goto viml_pexpr_parse_invalid_comma; +        } +        for (size_t i = 1; i < kv_size(ast_stack); i++) { +          ExprASTNode *const *const eastnode_p = +              (ExprASTNode *const *)kv_Z(ast_stack, i); +          const ExprASTNodeType eastnode_type = (*eastnode_p)->type; +          const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p); +          if (eastnode_type == kExprNodeLambda) { +            assert(cur_pt == kEPTLambdaArguments +                   && want_node == kENodeOperator); +            break; +          } else if (eastnode_type == kExprNodeDictLiteral +                     || eastnode_type == kExprNodeListLiteral +                     || eastnode_type == kExprNodeCall) { +            break; +          } else if (eastnode_type == kExprNodeComma +                     || eastnode_type == kExprNodeColon +                     || eastnode_lvl > kEOpLvlComma) { +            // Do nothing +          } else { +viml_pexpr_parse_invalid_comma: +            ERROR_FROM_TOKEN_AND_MSG( +                cur_token, +                _("E15: Comma outside of call, lambda or literal: %.*s")); +            break; +          } +          if (i == kv_size(ast_stack) - 1) { +            goto viml_pexpr_parse_invalid_comma; +          } +        } +        NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComma); +        ADD_OP_NODE(cur_node); +        HL_CUR_TOKEN(Comma); +        break; +      } +#define EXP_VAL_COLON "E15: Expected value, got colon: %.*s" +      case kExprLexColon: { +        bool is_ternary = false; +        if (kv_size(ast_stack) < 2) { +          goto viml_pexpr_parse_invalid_colon; +        } +        bool can_be_ternary = true; +        bool is_subscript = false; +        for (size_t i = 1; i < kv_size(ast_stack); i++) { +          ExprASTNode *const *const eastnode_p = +              (ExprASTNode *const *)kv_Z(ast_stack, i); +          const ExprASTNodeType eastnode_type = (*eastnode_p)->type; +          const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p); +          STATIC_ASSERT(kEOpLvlTernary > kEOpLvlComma, +                        "Unexpected operator priorities"); +          if (can_be_ternary && eastnode_type == kExprNodeTernaryValue +              && !(*eastnode_p)->data.ter.got_colon) { +            kv_drop(ast_stack, i); +            (*eastnode_p)->start = cur_token.start; +            (*eastnode_p)->len = cur_token.len; +            if (prev_token.type == kExprLexSpacing) { +              (*eastnode_p)->start = prev_token.start; +              (*eastnode_p)->len += prev_token.len; +            } +            is_ternary = true; +            (*eastnode_p)->data.ter.got_colon = true; +            ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); +            assert((*eastnode_p)->children != NULL); +            assert((*eastnode_p)->children->next == NULL); +            kvi_push(ast_stack, &(*eastnode_p)->children->next); +            break; +          } else if (eastnode_type == kExprNodeUnknownFigure) { +            SELECT_FIGURE_BRACE_TYPE(*eastnode_p, DictLiteral, Dict); +            break; +          } else if (eastnode_type == kExprNodeDictLiteral) { +            break; +          } else if (eastnode_type == kExprNodeSubscript) { +            is_subscript = true; +            can_be_ternary = false; +            assert(!is_ternary); +            break; +          } else if (eastnode_type == kExprNodeColon) { +            goto viml_pexpr_parse_invalid_colon; +          } else if (eastnode_lvl >= kEOpLvlTernaryValue) { +            // Do nothing +          } else if (eastnode_lvl >= kEOpLvlComma) { +            can_be_ternary = false; +          } else { +            goto viml_pexpr_parse_invalid_colon; +          } +          if (i == kv_size(ast_stack) - 1) { +            goto viml_pexpr_parse_invalid_colon; +          } +        } +        if (is_subscript) { +          assert(kv_size(ast_stack) > 1); +          // Colon immediately following subscript start: it is empty subscript +          // part like a[:2]. +          if (want_node == kENodeValue +              && (*kv_Z(ast_stack, 1))->type == kExprNodeSubscript) { +            NEW_NODE_WITH_CUR_POS(*top_node_p, kExprNodeMissing); +            (*top_node_p)->len = 0; +            want_node = kENodeOperator; +          } else { +            ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); +          } +          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); +          ADD_OP_NODE(cur_node); +          HL_CUR_TOKEN(SubscriptColon); +        } else { +          goto viml_pexpr_parse_valid_colon; +viml_pexpr_parse_invalid_colon: +          ERROR_FROM_TOKEN_AND_MSG( +              cur_token, +              _("E15: Colon outside of dictionary or ternary operator: %.*s")); +viml_pexpr_parse_valid_colon: +          ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); +          if (is_ternary) { +            HL_CUR_TOKEN(TernaryColon); +          } else { +            NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); +            ADD_OP_NODE(cur_node); +            HL_CUR_TOKEN(Colon); +          } +        } +        want_node = kENodeValue; +        break; +      } +#undef EXP_VAL_COLON +      case kExprLexBracket: { +        if (cur_token.data.brc.closing) { +          ExprASTNode **new_top_node_p = NULL; +          // Always drop the topmost value: +          // +          // 1. When want_node != kENodeValue topmost item on stack is +          //    a *finished* left operand, which may as well be "{@a}" which +          //    needs not be finished again. +          // 2. Otherwise it is pointing to NULL what nobody wants. +          kv_drop(ast_stack, 1); +          if (!kv_size(ast_stack)) { +            NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral); +            cur_node->len = 0; +            if (want_node != kENodeValue) { +              cur_node->children = *top_node_p; +            } +            *top_node_p = cur_node; +            goto viml_pexpr_parse_bracket_closing_error; +          } +          if (want_node == kENodeValue) { +            // It is OK to want value if +            // +            // 1. It is empty list literal, in which case top node will be +            //    ListLiteral. +            // 2. It is list literal with trailing comma, in which case top node +            //    will be that comma. +            // 3. It is subscript with colon, but without one of the values: +            //    e.g. "a[:]", "a[1:]", top node will be colon in this case. +            if ((*kv_last(ast_stack))->type != kExprNodeListLiteral +                && (*kv_last(ast_stack))->type != kExprNodeComma +                && (*kv_last(ast_stack))->type != kExprNodeColon) { +              ERROR_FROM_TOKEN_AND_MSG( +                  cur_token, +                  _("E15: Expected value, got closing bracket: %.*s")); +            } +          } +          do { +            new_top_node_p = kv_pop(ast_stack); +          } while (kv_size(ast_stack) +                   && (new_top_node_p == NULL +                       || ((*new_top_node_p)->type != kExprNodeListLiteral +                           && (*new_top_node_p)->type != kExprNodeSubscript))); +          ExprASTNode *new_top_node = *new_top_node_p; +          switch (new_top_node->type) { +            case kExprNodeListLiteral: { +              if (pt_is_assignment(cur_pt) && new_top_node->children == NULL) { +                ERROR_FROM_TOKEN_AND_MSG( +                    cur_token, _("E475: Unable to assign to empty list: %.*s")); +              } +              HL_CUR_TOKEN(List); +              break; +            } +            case kExprNodeSubscript: { +              HL_CUR_TOKEN(SubscriptBracket); +              break; +            } +            default: { +viml_pexpr_parse_bracket_closing_error: +              assert(!kv_size(ast_stack)); +              ERROR_FROM_TOKEN_AND_MSG( +                  cur_token, _("E15: Unexpected closing figure brace: %.*s")); +              HL_CUR_TOKEN(List); +              break; +            } +          } +          kvi_push(ast_stack, new_top_node_p); +          want_node = kENodeOperator; +          if (kv_size(ast_stack) <= asgn_level) { +            assert(kv_size(ast_stack) == asgn_level); +            asgn_level = 0; +            if (cur_pt == kEPTAssignment) { +              assert(ast.err.msg); +            } else if (cur_pt == kEPTExpr +                       && kv_size(pt_stack) > 1 +                       && pt_is_assignment(kv_Z(pt_stack, 1))) { +              kv_drop(pt_stack, 1); +            } +          } +          if (cur_pt == kEPTSingleAssignment && kv_size(ast_stack) == 1) { +            kv_drop(pt_stack, 1); +          } +        } else { +          if (want_node == kENodeValue) { +            // Value means list literal or list assignment. +            NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral); +            *top_node_p = cur_node; +            kvi_push(ast_stack, &cur_node->children); +            want_node = kENodeValue; +            if (cur_pt == kEPTAssignment) { +              // Additional assignment parse type allows to easily forbid nested +              // lists. +              kvi_push(pt_stack, kEPTSingleAssignment); +            } else if (cur_pt == kEPTSingleAssignment) { +              ERROR_FROM_TOKEN_AND_MSG( +                  cur_token, +                  _("E475: Nested lists not allowed when assigning: %.*s")); +            } +            HL_CUR_TOKEN(List); +          } else { +            // Operator means subscript, also in assignment. But in assignment +            // subscript may be pretty much any expression, so need to push +            // kEPTExpr. +            if (prev_token.type == kExprLexSpacing) { +              OP_MISSING; +            } +            NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeSubscript); +            ADD_OP_NODE(cur_node); +            HL_CUR_TOKEN(SubscriptBracket); +            if (pt_is_assignment(cur_pt)) { +              assert(want_node == kENodeValue);  // Subtract 1 for NULL at top. +              asgn_level = kv_size(ast_stack) - 1; +              kvi_push(pt_stack, kEPTExpr); +            } +          } +        } +        break; +      } +      case kExprLexFigureBrace: { +        if (cur_token.data.brc.closing) { +          ExprASTNode **new_top_node_p = NULL; +          // Always drop the topmost value: +          // +          // 1. When want_node != kENodeValue topmost item on stack is +          //    a *finished* left operand, which may as well be "{@a}" which +          //    needs not be finished again. +          // 2. Otherwise it is pointing to NULL what nobody wants. +          kv_drop(ast_stack, 1); +          if (!kv_size(ast_stack)) { +            NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure); +            cur_node->data.fig.type_guesses.allow_lambda = false; +            cur_node->data.fig.type_guesses.allow_dict = false; +            cur_node->data.fig.type_guesses.allow_ident = false; +            cur_node->len = 0; +            if (want_node != kENodeValue) { +              cur_node->children = *top_node_p; +            } +            *top_node_p = cur_node; +            new_top_node_p = top_node_p; +            goto viml_pexpr_parse_figure_brace_closing_error; +          } +          if (want_node == kENodeValue) { +            if ((*kv_last(ast_stack))->type != kExprNodeUnknownFigure +                && (*kv_last(ast_stack))->type != kExprNodeComma) { +              // kv_last being UnknownFigure may occur for empty dictionary +              // literal, while Comma is expected in case of non-empty one. +              ERROR_FROM_TOKEN_AND_MSG( +                  cur_token, +                  _("E15: Expected value, got closing figure brace: %.*s")); +            } +          } +          do { +            new_top_node_p = kv_pop(ast_stack); +          } while (kv_size(ast_stack) +                   && (new_top_node_p == NULL +                       || ((*new_top_node_p)->type != kExprNodeUnknownFigure +                           && (*new_top_node_p)->type != kExprNodeDictLiteral +                           && ((*new_top_node_p)->type +                               != kExprNodeCurlyBracesIdentifier) +                           && (*new_top_node_p)->type != kExprNodeLambda))); +          ExprASTNode *new_top_node = *new_top_node_p; +          switch (new_top_node->type) { +            case kExprNodeUnknownFigure: { +              if (new_top_node->children == NULL) { +                // No children of curly braces node indicates empty dictionary. +                assert(want_node == kENodeValue); +                assert(new_top_node->data.fig.type_guesses.allow_dict); +                SELECT_FIGURE_BRACE_TYPE(new_top_node, DictLiteral, Dict); +                HL_CUR_TOKEN(Dict); +              } else if (new_top_node->data.fig.type_guesses.allow_ident) { +                SELECT_FIGURE_BRACE_TYPE(new_top_node, CurlyBracesIdentifier, +                                         Curly); +                HL_CUR_TOKEN(Curly); +              } else { +                // If by this time type of the node has not already been +                // guessed, but it definitely is not a curly braces name then +                // it is invalid for sure. +                ERROR_FROM_NODE_AND_MSG( +                    new_top_node, +                    _("E15: Don't know what figure brace means: %.*s")); +                if (pstate->colors) { +                  // Will reset to NvimInvalidFigureBrace. +                  kv_A(*pstate->colors, +                       new_top_node->data.fig.opening_hl_idx).group = ( +                           HL(FigureBrace)); +                } +                HL_CUR_TOKEN(FigureBrace); +              } +              break; +            } +            case kExprNodeDictLiteral: { +              HL_CUR_TOKEN(Dict); +              break; +            } +            case kExprNodeCurlyBracesIdentifier: { +              HL_CUR_TOKEN(Curly); +              break; +            } +            case kExprNodeLambda: { +              HL_CUR_TOKEN(Lambda); +              break; +            } +            default: { +viml_pexpr_parse_figure_brace_closing_error: +              assert(!kv_size(ast_stack)); +              ERROR_FROM_TOKEN_AND_MSG( +                  cur_token, _("E15: Unexpected closing figure brace: %.*s")); +              HL_CUR_TOKEN(FigureBrace); +              break; +            } +          } +          kvi_push(ast_stack, new_top_node_p); +          want_node = kENodeOperator; +          if (kv_size(ast_stack) <= asgn_level) { +            assert(kv_size(ast_stack) == asgn_level); +            if (cur_pt == kEPTExpr +                && kv_size(pt_stack) > 1 +                && pt_is_assignment(kv_Z(pt_stack, 1))) { +              kv_drop(pt_stack, 1); +              asgn_level = 0; +            } +          } +        } else { +          if (want_node == kENodeValue) { +            HL_CUR_TOKEN(FigureBrace); +            // Value: may be any of lambda, dictionary literal and curly braces +            // name. + +            // Though if we are in an assignment this may only be a curly braces +            // name. +            if (pt_is_assignment(cur_pt)) { +              NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCurlyBracesIdentifier); +              cur_node->data.fig.type_guesses.allow_lambda = false; +              cur_node->data.fig.type_guesses.allow_dict = false; +              cur_node->data.fig.type_guesses.allow_ident = true; +              kvi_push(pt_stack, kEPTExpr); +            } else { +              NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure); +              cur_node->data.fig.type_guesses.allow_lambda = true; +              cur_node->data.fig.type_guesses.allow_dict = true; +              cur_node->data.fig.type_guesses.allow_ident = true; +            } +            if (pstate->colors) { +              cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors) - 1; +            } +            *top_node_p = cur_node; +            kvi_push(ast_stack, &cur_node->children); +            kvi_push(pt_stack, kEPTLambdaArguments); +            lambda_node = cur_node; +          } else { +            ADD_IDENT( +                do { +                  NEW_NODE_WITH_CUR_POS(cur_node, +                                        kExprNodeCurlyBracesIdentifier); +                  cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors); +                  cur_node->data.fig.type_guesses.allow_lambda = false; +                  cur_node->data.fig.type_guesses.allow_dict = false; +                  cur_node->data.fig.type_guesses.allow_ident = true; +                  kvi_push(ast_stack, &cur_node->children); +                  if (pt_is_assignment(cur_pt)) { +                    kvi_push(pt_stack, kEPTExpr); +                  } +                  want_node = kENodeValue; +                } while (0), +                Curly); +          } +          if (pt_is_assignment(cur_pt) +              && !pt_is_assignment(kv_last(pt_stack))) { +            assert(want_node == kENodeValue);  // Subtract 1 for NULL at top. +            asgn_level = kv_size(ast_stack) - 1; +          } +        } +        break; +      } +      case kExprLexArrow: { +        if (cur_pt == kEPTLambdaArguments) { +          kv_drop(pt_stack, 1); +          assert(kv_size(pt_stack)); +          if (want_node == kENodeValue) { +            // Wanting value means trailing comma and NULL at the top of the +            // stack. +            kv_drop(ast_stack, 1); +          } +          assert(kv_size(ast_stack) >= 1); +          while ((*kv_last(ast_stack))->type != kExprNodeLambda +                 && (*kv_last(ast_stack))->type != kExprNodeUnknownFigure) { +            kv_drop(ast_stack, 1); +          } +          assert((*kv_last(ast_stack)) == lambda_node); +          SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda); +          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow); +          if (lambda_node->children == NULL) { +            assert(want_node == kENodeValue); +            lambda_node->children = cur_node; +            kvi_push(ast_stack, &lambda_node->children); +          } else { +            assert(lambda_node->children->next == NULL); +            lambda_node->children->next = cur_node; +            kvi_push(ast_stack, &lambda_node->children->next); +          } +          kvi_push(ast_stack, &cur_node->children); +          lambda_node = NULL; +        } else { +          // Only first branch is valid. +          ADD_VALUE_IF_MISSING(_("E15: Unexpected arrow: %.*s")); +          ERROR_FROM_TOKEN_AND_MSG( +              cur_token, _("E15: Arrow outside of lambda: %.*s")); +          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow); +          ADD_OP_NODE(cur_node); +        } +        want_node = kENodeValue; +        HL_CUR_TOKEN(Arrow); +        break; +      } +      case kExprLexPlainIdentifier: { +        const ExprVarScope scope = (cur_token.type == kExprLexInvalid +                                    ? kExprVarScopeMissing +                                    : cur_token.data.var.scope); +        if (want_node == kENodeValue) { +          want_node = kENodeOperator; +          NEW_NODE_WITH_CUR_POS(cur_node, +                                (node_is_key +                                 ? kExprNodePlainKey +                                 : kExprNodePlainIdentifier)); +          cur_node->data.var.scope = scope; +          const size_t scope_shift = (scope == kExprVarScopeMissing ? 0 : 2); +          cur_node->data.var.ident = (pline.data + cur_token.start.col +                                      + scope_shift); +          cur_node->data.var.ident_len = cur_token.len - scope_shift; +          *top_node_p = cur_node; +          if (scope_shift) { +            assert(!node_is_key); +            viml_parser_highlight(pstate, cur_token.start, 1, +                                  HL(IdentifierScope)); +            viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1, +                                  HL(IdentifierScopeDelimiter)); +          } +          viml_parser_highlight(pstate, shifted_pos(cur_token.start, +                                                    scope_shift), +                                cur_token.len - scope_shift, +                                (node_is_key +                                 ? HL(IdentifierKey) +                                 : HL(IdentifierName))); +        } else { +          if (scope == kExprVarScopeMissing) { +            ADD_IDENT( +                do { +                  NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier); +                  cur_node->data.var.scope = scope; +                  cur_node->data.var.ident = pline.data + cur_token.start.col; +                  cur_node->data.var.ident_len = cur_token.len; +                  want_node = kENodeOperator; +                } while (0), +                IdentifierName); +          } else { +            OP_MISSING; +          } +        } +        break; +      } +      case kExprLexNumber: { +        if (want_node != kENodeValue) { +          OP_MISSING; +        } +        if (node_is_key) { +          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainKey); +          cur_node->data.var.ident = pline.data + cur_token.start.col; +          cur_node->data.var.ident_len = cur_token.len; +          HL_CUR_TOKEN(IdentifierKey); +        } else if (cur_token.data.num.is_float) { +          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeFloat); +          cur_node->data.flt.value = cur_token.data.num.val.floating; +          HL_CUR_TOKEN(Float); +        } else { +          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeInteger); +          cur_node->data.num.value = cur_token.data.num.val.integer; +          const uint8_t prefix_length = base_to_prefix_length[ +              cur_token.data.num.base]; +          viml_parser_highlight(pstate, cur_token.start, prefix_length, +                                HL(NumberPrefix)); +          viml_parser_highlight( +              pstate, shifted_pos(cur_token.start, prefix_length), +              cur_token.len - prefix_length, HL(Number)); +        } +        want_node = kENodeOperator; +        *top_node_p = cur_node; +        break; +      } +      case kExprLexDot: { +        ADD_VALUE_IF_MISSING(_("E15: Unexpected dot: %.*s")); +        if (prev_token.type == kExprLexSpacing) { +          if (cur_pt == kEPTAssignment) { +            ERROR_FROM_TOKEN_AND_MSG( +                cur_token, _("E15: Cannot concatenate in assignments: %.*s")); +          } +          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcat); +          HL_CUR_TOKEN(Concat); +        } else { +          NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcatOrSubscript); +          HL_CUR_TOKEN(ConcatOrSubscript); +        } +        ADD_OP_NODE(cur_node); +        break; +      } +      case kExprLexParenthesis: { +        if (cur_token.data.brc.closing) { +          if (want_node == kENodeValue) { +            if (kv_size(ast_stack) > 1) { +              const ExprASTNode *const prev_top_node = *kv_Z(ast_stack, 1); +              if (prev_top_node->type == kExprNodeCall) { +                // Function call without arguments, this is not an error. +                // But further code does not expect NULL nodes. +                kv_drop(ast_stack, 1); +                goto viml_pexpr_parse_no_paren_closing_error; +              } +            } +            ERROR_FROM_TOKEN_AND_MSG( +                cur_token, _("E15: Expected value, got parenthesis: %.*s")); +            NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing); +            cur_node->len = 0; +            *top_node_p = cur_node; +          } else { +            // Always drop the topmost value: when want_node != kENodeValue +            // topmost item on stack is a *finished* left operand, which may as +            // well be "(@a)" which needs not be finished again. +            kv_drop(ast_stack, 1); +          } +viml_pexpr_parse_no_paren_closing_error: {} +          ExprASTNode **new_top_node_p = NULL; +          while (kv_size(ast_stack) +                 && (new_top_node_p == NULL +                     || ((*new_top_node_p)->type != kExprNodeNested +                         && (*new_top_node_p)->type != kExprNodeCall))) { +            new_top_node_p = kv_pop(ast_stack); +          } +          if (new_top_node_p != NULL +              && ((*new_top_node_p)->type == kExprNodeNested +                  || (*new_top_node_p)->type == kExprNodeCall)) { +            if ((*new_top_node_p)->type == kExprNodeNested) { +              HL_CUR_TOKEN(NestingParenthesis); +            } else { +              HL_CUR_TOKEN(CallingParenthesis); +            } +          } else { +            // “Always drop the topmost value” branch has got rid of the single +            // value stack had, so there is nothing known to enclose. Correct +            // this. +            if (new_top_node_p == NULL) { +              new_top_node_p = top_node_p; +            } +            ERROR_FROM_TOKEN_AND_MSG( +                cur_token, _("E15: Unexpected closing parenthesis: %.*s")); +            HL_CUR_TOKEN(NestingParenthesis); +            cur_node = NEW_NODE(kExprNodeNested); +            cur_node->start = cur_token.start; +            cur_node->len = 0; +            // Unexpected closing parenthesis, assume that it was wanted to +            // enclose everything in (). +            cur_node->children = *new_top_node_p; +            *new_top_node_p = cur_node; +            assert(cur_node->next == NULL); +          } +          kvi_push(ast_stack, new_top_node_p); +          want_node = kENodeOperator; +        } else { +          if (want_node == kENodeValue) { +            NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNested); +            *top_node_p = cur_node; +            kvi_push(ast_stack, &cur_node->children); +            HL_CUR_TOKEN(NestingParenthesis); +          } else if (want_node == kENodeOperator) { +            if (prev_token.type == kExprLexSpacing) { +              // For some reason "function (args)" is a function call, but +              // "(funcref) (args)" is not. AFAIR this somehow involves +              // compatibility and Bram was commenting that this is +              // intentionally inconsistent and he is not very happy with the +              // situation himself. +              if ((*top_node_p)->type != kExprNodePlainIdentifier +                  && (*top_node_p)->type != kExprNodeComplexIdentifier +                  && (*top_node_p)->type != kExprNodeCurlyBracesIdentifier) { +                OP_MISSING; +              } +            } +            NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCall); +            ADD_OP_NODE(cur_node); +            HL_CUR_TOKEN(CallingParenthesis); +          } else { +            // Currently it is impossible to reach this. +            assert(false); +          } +          want_node = kENodeValue; +        } +        break; +      } +      case kExprLexQuestion: { +        ADD_VALUE_IF_MISSING(_("E15: Expected value, got question mark: %.*s")); +        NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeTernary); +        ADD_OP_NODE(cur_node); +        HL_CUR_TOKEN(Ternary); +        ExprASTNode *ter_val_node; +        NEW_NODE_WITH_CUR_POS(ter_val_node, kExprNodeTernaryValue); +        ter_val_node->data.ter.got_colon = false; +        assert(cur_node->children != NULL); +        assert(cur_node->children->next == NULL); +        assert(kv_last(ast_stack) == &cur_node->children->next); +        *kv_last(ast_stack) = ter_val_node; +        kvi_push(ast_stack, &ter_val_node->children); +        break; +      } +      case kExprLexDoubleQuotedString: +      case kExprLexSingleQuotedString: { +        const bool is_double = (tok_type == kExprLexDoubleQuotedString); +        if (!cur_token.data.str.closed) { +          // It is weird, but Vim has two identical errors messages with +          // different error numbers: "E114: Missing quote" and +          // "E115: Missing quote". +          ERROR_FROM_TOKEN_AND_MSG( +              cur_token, (is_double +                          ? _("E114: Missing double quote: %.*s") +                          : _("E115: Missing single quote: %.*s"))); +        } +        if (want_node == kENodeOperator) { +          OP_MISSING; +        } +        NEW_NODE_WITH_CUR_POS( +            cur_node, (is_double +                       ? kExprNodeDoubleQuotedString +                       : kExprNodeSingleQuotedString)); +        *top_node_p = cur_node; +        parse_quoted_string(pstate, cur_node, cur_token, ast_stack, is_invalid); +        want_node = kENodeOperator; +        break; +      } +      case kExprLexAssignment: { +        if (cur_pt == kEPTAssignment) { +          kv_drop(pt_stack, 1); +        } else if (cur_pt == kEPTSingleAssignment) { +          kv_drop(pt_stack, 2); +          ERROR_FROM_TOKEN_AND_MSG( +              cur_token, +              _("E475: Expected closing bracket to end list assignment " +                "lvalue: %.*s")); +        } else { +          ERROR_FROM_TOKEN_AND_MSG( +              cur_token, _("E15: Misplaced assignment: %.*s")); +        } +        assert(kv_size(pt_stack)); +        assert(kv_last(pt_stack) == kEPTExpr); +        ADD_VALUE_IF_MISSING(_("E15: Unexpected assignment: %.*s")); +        NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeAssignment); +        cur_node->data.ass.type = cur_token.data.ass.type; +        switch (cur_token.data.ass.type) { +#define HL_ASGN(asgn, hl) \ +          case kExprAsgn##asgn: { HL_CUR_TOKEN(hl); break; } +          HL_ASGN(Plain, PlainAssignment) +          HL_ASGN(Add, AssignmentWithAddition) +          HL_ASGN(Subtract, AssignmentWithSubtraction) +          HL_ASGN(Concat, AssignmentWithConcatenation) +#undef HL_ASGN +        } +        ADD_OP_NODE(cur_node); +        break; +      } +    } +viml_pexpr_parse_cycle_end: +    prev_token = cur_token; +    highlighted_prev_spacing = false; +    viml_parser_advance(pstate, cur_token.len); +  } while (true); +viml_pexpr_parse_end: +  assert(kv_size(pt_stack)); +  assert(kv_size(ast_stack)); +  if (want_node == kENodeValue +      // Blacklist some parse type entries as their presence means better error +      // message in the other branch. +      && kv_last(pt_stack) != kEPTLambdaArguments) { +    east_set_error(pstate, &ast.err, _("E15: Expected value, got EOC: %.*s"), +                   pstate->pos); +  } else if (kv_size(ast_stack) != 1) { +    // Something may be wrong, check whether it really is. + +    // Pointer to ast.root must never be dropped, so “!= 1” is expected to be +    // the same as “> 1”. +    assert(kv_size(ast_stack)); +    // Topmost stack item must be a *finished* value, so it must not be +    // analyzed. E.g. it may contain an already finished nested expression. +    kv_drop(ast_stack, 1); +    while (ast.err.msg == NULL && kv_size(ast_stack)) { +      const ExprASTNode *const cur_node = (*kv_pop(ast_stack)); +      // This should only happen when want_node == kENodeValue. +      assert(cur_node != NULL); +      // TODO(ZyX-I): Rehighlight as invalid? +      switch (cur_node->type) { +        case kExprNodeOpMissing: +        case kExprNodeMissing: { +          // Error should’ve been already reported. +          break; +        } +        case kExprNodeCall: { +          east_set_error( +              pstate, &ast.err, +              _("E116: Missing closing parenthesis for function call: %.*s"), +              cur_node->start); +          break; +        } +        case kExprNodeNested: { +          east_set_error( +              pstate, &ast.err, +              _("E110: Missing closing parenthesis for nested expression" +                ": %.*s"), +              cur_node->start); +          break; +        } +        case kExprNodeListLiteral: { +          // For whatever reason "[1" yields "E696: Missing comma in list" error +          // in Vim while "[1," yields E697. +          east_set_error( +              pstate, &ast.err, +              _("E697: Missing end of List ']': %.*s"), +              cur_node->start); +          break; +        } +        case kExprNodeDictLiteral: { +          // Same problem like with list literal with E722 (missing comma) vs +          // E723, but additionally just "{" yields only E15. +          east_set_error( +              pstate, &ast.err, +              _("E723: Missing end of Dictionary '}': %.*s"), +              cur_node->start); +          break; +        } +        case kExprNodeUnknownFigure: { +          east_set_error( +              pstate, &ast.err, +              _("E15: Missing closing figure brace: %.*s"), +              cur_node->start); +          break; +        } +        case kExprNodeLambda: { +          east_set_error( +              pstate, &ast.err, +              _("E15: Missing closing figure brace for lambda: %.*s"), +              cur_node->start); +          break; +        } +        case kExprNodeCurlyBracesIdentifier: { +          // Until trailing "}" it is impossible to distinguish curly braces +          // identifier and dictionary, so it must not appear in the stack like +          // this. +          assert(false); +        } +        case kExprNodeInteger: +        case kExprNodeFloat: +        case kExprNodeSingleQuotedString: +        case kExprNodeDoubleQuotedString: +        case kExprNodeOption: +        case kExprNodeEnvironment: +        case kExprNodeRegister: +        case kExprNodePlainIdentifier: +        case kExprNodePlainKey: { +          // These are plain values and not containers, for them it should only +          // be possible to show up in the topmost stack element, but it was +          // unconditionally popped at the start. +          assert(false); +        } +        case kExprNodeComma: +        case kExprNodeColon: +        case kExprNodeArrow: { +          // It is actually only valid inside something else, but everything +          // where one of the above is valid requires to be closed and thus is +          // to be caught later. +          break; +        } +        case kExprNodeSubscript: +        case kExprNodeConcatOrSubscript: +        case kExprNodeComplexIdentifier: +        case kExprNodeAssignment: +        case kExprNodeMod: +        case kExprNodeDivision: +        case kExprNodeMultiplication: +        case kExprNodeNot: +        case kExprNodeAnd: +        case kExprNodeOr: +        case kExprNodeConcat: +        case kExprNodeComparison: +        case kExprNodeUnaryMinus: +        case kExprNodeUnaryPlus: +        case kExprNodeBinaryMinus: +        case kExprNodeTernary: +        case kExprNodeBinaryPlus: { +          // It is OK to see these in the stack. +          break; +        } +        case kExprNodeTernaryValue: { +          if (!cur_node->data.ter.got_colon) { +            // Actually Vim throws E109 in more cases. +            east_set_error( +                pstate, &ast.err, _("E109: Missing ':' after '?': %.*s"), +                cur_node->start); +          } +          break; +        } +      } +    } +  } +  kvi_destroy(ast_stack); +  return ast; +}  // NOLINT(readability/fn_size) + +#undef NEW_NODE +#undef HL diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h new file mode 100644 index 0000000000..23e172da75 --- /dev/null +++ b/src/nvim/viml/parser/expressions.h @@ -0,0 +1,389 @@ +#ifndef NVIM_VIML_PARSER_EXPRESSIONS_H +#define NVIM_VIML_PARSER_EXPRESSIONS_H + +#include <stddef.h> +#include <stdint.h> +#include <stdbool.h> + +#include "nvim/types.h" +#include "nvim/viml/parser/parser.h" +#include "nvim/eval/typval.h" + +// Defines whether to ignore case: +//    ==   kCCStrategyUseOption +//    ==#  kCCStrategyMatchCase +//    ==?  kCCStrategyIgnoreCase +typedef enum { +  kCCStrategyUseOption = 0,  // 0 for xcalloc +  kCCStrategyMatchCase = '#', +  kCCStrategyIgnoreCase = '?', +} ExprCaseCompareStrategy; + +/// Lexer token type +typedef enum { +  kExprLexInvalid = 0,  ///< Invalid token, indicaten an error. +  kExprLexMissing,  ///< Missing token, for use in parser. +  kExprLexSpacing,  ///< Spaces, tabs, newlines, etc. +  kExprLexEOC,  ///< End of command character: NL, |, just end of stream. + +  kExprLexQuestion,  ///< Question mark, for use in ternary. +  kExprLexColon,  ///< Colon, for use in ternary. +  kExprLexOr,  ///< Logical or operator. +  kExprLexAnd,  ///< Logical and operator. +  kExprLexComparison,  ///< One of the comparison operators. +  kExprLexPlus,  ///< Plus sign. +  kExprLexMinus,  ///< Minus sign. +  kExprLexDot,  ///< Dot: either concat or subscript, also part of the float. +  kExprLexMultiplication,  ///< Multiplication, division or modulo operator. + +  kExprLexNot,  ///< Not: !. + +  kExprLexNumber,  ///< Integer number literal, or part of a float. +  kExprLexSingleQuotedString,  ///< Single quoted string literal. +  kExprLexDoubleQuotedString,  ///< Double quoted string literal. +  kExprLexOption,  ///< &optionname option value. +  kExprLexRegister,  ///< @r register value. +  kExprLexEnv,  ///< Environment $variable value. +  kExprLexPlainIdentifier,  ///< Identifier without scope: `abc`, `foo#bar`. + +  kExprLexBracket,  ///< Bracket, either opening or closing. +  kExprLexFigureBrace,  ///< Figure brace, either opening or closing. +  kExprLexParenthesis,  ///< Parenthesis, either opening or closing. +  kExprLexComma,  ///< Comma. +  kExprLexArrow,  ///< Arrow, like from lambda expressions. +  kExprLexAssignment,  ///< Assignment: `=` or `{op}=`. +  // XXX When modifying this enum you need to also modify eltkn_type_tab in +  //     expressions.c and tests and, possibly, viml_pexpr_repr_token. +} LexExprTokenType; + +typedef enum { +  kExprCmpEqual,  ///< Equality, unequality. +  kExprCmpMatches,  ///< Matches regex, not matches regex. +  kExprCmpGreater,  ///< `>` or `<=` +  kExprCmpGreaterOrEqual,  ///< `>=` or `<`. +  kExprCmpIdentical,  ///< `is` or `isnot` +} ExprComparisonType; + +/// All possible option scopes +typedef enum { +  kExprOptScopeUnspecified = 0, +  kExprOptScopeGlobal = 'g', +  kExprOptScopeLocal = 'l', +} ExprOptScope; + +/// All possible assignment types: `=` and `{op}=`. +typedef enum { +  kExprAsgnPlain = 0,  ///< Plain assignment: `=`. +  kExprAsgnAdd,  ///< Assignment augmented with addition: `+=`. +  kExprAsgnSubtract,  ///< Assignment augmented with subtraction: `-=`. +  kExprAsgnConcat,  ///< Assignment augmented with concatenation: `.=`. +} ExprAssignmentType; + +#define EXPR_OPT_SCOPE_LIST \ +    ((char[]){ kExprOptScopeGlobal, kExprOptScopeLocal }) + +/// All possible variable scopes +typedef enum { +  kExprVarScopeMissing = 0, +  kExprVarScopeScript = 's', +  kExprVarScopeGlobal = 'g', +  kExprVarScopeVim = 'v', +  kExprVarScopeBuffer = 'b', +  kExprVarScopeWindow = 'w', +  kExprVarScopeTabpage = 't', +  kExprVarScopeLocal = 'l', +  kExprVarScopeArguments = 'a', +} ExprVarScope; + +#define EXPR_VAR_SCOPE_LIST \ +    ((char[]) { \ +        kExprVarScopeScript, kExprVarScopeGlobal, kExprVarScopeVim, \ +        kExprVarScopeBuffer, kExprVarScopeWindow, kExprVarScopeTabpage, \ +        kExprVarScopeLocal, kExprVarScopeBuffer, kExprVarScopeArguments, \ +    }) + +/// Lexer token +typedef struct { +  ParserPosition start; +  size_t len; +  LexExprTokenType type; +  union { +    struct { +      ExprComparisonType type;  ///< Comparison type. +      ExprCaseCompareStrategy ccs;  ///< Case comparison strategy. +      bool inv;  ///< True if comparison is to be inverted. +    } cmp;  ///< For kExprLexComparison. + +    struct { +      enum { +        kExprLexMulMul,  ///< Real multiplication. +        kExprLexMulDiv,  ///< Division. +        kExprLexMulMod,  ///< Modulo. +      } type;  ///< Multiplication type. +    } mul;  ///< For kExprLexMultiplication. + +    struct { +      bool closing;  ///< True if bracket/etc is a closing one. +    } brc;  ///< For brackets/braces/parenthesis. + +    struct { +      int name;  ///< Register name, may be -1 if name not present. +    } reg;  ///< For kExprLexRegister. + +    struct { +      bool closed;  ///< True if quote was closed. +    } str;  ///< For kExprLexSingleQuotedString and kExprLexDoubleQuotedString. + +    struct { +      const char *name;  ///< Option name start. +      size_t len;  ///< Option name length. +      ExprOptScope scope;  ///< Option scope: &l:, &g: or not specified. +    } opt;  ///< Option properties. + +    struct { +      ExprVarScope scope;  ///< Scope character or 0 if not present. +      bool autoload;  ///< Has autoload characters. +    } var;  ///< For kExprLexPlainIdentifier + +    struct { +      LexExprTokenType type;  ///< Suggested type for parsing incorrect code. +      const char *msg;  ///< Error message. +    } err;  ///< For kExprLexInvalid + +    struct { +      union { +        float_T floating; +        uvarnumber_T integer; +      } val;  ///< Number value. +      uint8_t base;  ///< Base: 2, 8, 10 or 16. +      bool is_float;  ///< True if number is a floating-point. +    } num;  ///< For kExprLexNumber + +    struct { +      ExprAssignmentType type; +    } ass;  ///< For kExprLexAssignment +  } data;  ///< Additional data, if needed. +} LexExprToken; + +typedef enum { +  /// If set, “pointer” to the current byte in pstate will not be shifted +  kELFlagPeek = (1 << 0), +  /// Determines whether scope is allowed to come before the identifier +  kELFlagForbidScope = (1 << 1), +  /// Determines whether floating-point numbers are allowed +  /// +  /// I.e. whether dot is a decimal point separator or is not a part of +  /// a number at all. +  kELFlagAllowFloat = (1 << 2), +  /// Determines whether `is` and `isnot` are seen as comparison operators +  /// +  /// If set they are supposed to be just regular identifiers. +  kELFlagIsNotCmp = (1 << 3), +  /// Determines whether EOC tokens are allowed +  /// +  /// If set then it will yield Invalid token with E15 in place of EOC one if +  /// “EOC” is something like "|". It is fine with emitting EOC at the end of +  /// string still, with or without this flag set. +  kELFlagForbidEOC = (1 << 4), +  // XXX Whenever you add a new flag, alter klee_assume() statement in +  //     viml_expressions_lexer.c. +} LexExprFlags; + +/// Expression AST node type +typedef enum { +  kExprNodeMissing = 0, +  kExprNodeOpMissing, +  kExprNodeTernary,  ///< Ternary operator. +  kExprNodeTernaryValue,  ///< Ternary operator, colon. +  kExprNodeRegister,  ///< Register. +  kExprNodeSubscript,  ///< Subscript. +  kExprNodeListLiteral,  ///< List literal. +  kExprNodeUnaryPlus, +  kExprNodeBinaryPlus, +  kExprNodeNested,  ///< Nested parenthesised expression. +  kExprNodeCall,  ///< Function call. +  /// Plain identifier: simple variable/function name +  /// +  /// Looks like "string", "g:Foo", etc: consists from a single +  /// kExprLexPlainIdentifier token. +  kExprNodePlainIdentifier, +  /// Plain dictionary key, for use with kExprNodeConcatOrSubscript +  kExprNodePlainKey, +  /// Complex identifier: variable/function name with curly braces +  kExprNodeComplexIdentifier, +  /// Figure brace expression which is not yet known +  /// +  /// May resolve to any of kExprNodeDictLiteral, kExprNodeLambda or +  /// kExprNodeCurlyBracesIdentifier. +  kExprNodeUnknownFigure, +  kExprNodeLambda,  ///< Lambda. +  kExprNodeDictLiteral,  ///< Dictionary literal. +  kExprNodeCurlyBracesIdentifier,  ///< Part of the curly braces name. +  kExprNodeComma,  ///< Comma “operator”. +  kExprNodeColon,  ///< Colon “operator”. +  kExprNodeArrow,  ///< Arrow “operator”. +  kExprNodeComparison,  ///< Various comparison operators. +  /// Concat operator +  /// +  /// To be only used in cases when it is known for sure it is not a subscript. +  kExprNodeConcat, +  /// Concat or subscript operator +  /// +  /// For cases when it is not obvious whether expression is a concat or +  /// a subscript. May only have either number or plain identifier as the second +  /// child. To make it easier to avoid curly braces in place of +  /// kExprNodePlainIdentifier node kExprNodePlainKey is used. +  kExprNodeConcatOrSubscript, +  kExprNodeInteger,  ///< Integral number. +  kExprNodeFloat,  ///< Floating-point number. +  kExprNodeSingleQuotedString, +  kExprNodeDoubleQuotedString, +  kExprNodeOr, +  kExprNodeAnd, +  kExprNodeUnaryMinus, +  kExprNodeBinaryMinus, +  kExprNodeNot, +  kExprNodeMultiplication, +  kExprNodeDivision, +  kExprNodeMod, +  kExprNodeOption, +  kExprNodeEnvironment, +  kExprNodeAssignment, +  // XXX When modifying this list also modify east_node_type_tab both in parser +  //     and in tests, and you most likely will also have to alter list of +  //     highlight groups stored in highlight_init_cmdline variable. +} ExprASTNodeType; + +typedef struct expr_ast_node ExprASTNode; + +/// Structure representing one AST node +struct expr_ast_node { +  ExprASTNodeType type;  ///< Node type. +  /// Node children: e.g. for 1 + 2 nodes 1 and 2 will be children of +. +  ExprASTNode *children; +  /// Next node: e.g. for 1 + 2 child nodes 1 and 2 are put into a single-linked +  /// list: `(+)->children` references only node 1, node 2 is in +  /// `(+)->children->next`. +  ExprASTNode *next; +  ParserPosition start; +  size_t len; +  union { +    struct { +      int name;  ///< Register name, may be -1 if name not present. +    } reg;  ///< For kExprNodeRegister. +    struct { +      /// Which nodes UnknownFigure can’t possibly represent. +      struct { +        /// True if UnknownFigure may actually represent dictionary literal. +        bool allow_dict; +        /// True if UnknownFigure may actually represent lambda. +        bool allow_lambda; +        /// True if UnknownFigure may actually be part of curly braces name. +        bool allow_ident; +      } type_guesses; +      /// Highlight chunk index, used for rehighlighting if needed +      size_t opening_hl_idx; +    } fig;  ///< For kExprNodeUnknownFigure. +    struct { +      ExprVarScope scope;  ///< Scope character or 0 if not present. +      /// Actual identifier without scope. +      /// +      /// Points to inside parser reader state. +      const char *ident; +      size_t ident_len;  ///< Actual identifier length. +    } var;  ///< For kExprNodePlainIdentifier and kExprNodePlainKey. +    struct { +      bool got_colon;  ///< True if colon was seen. +    } ter;  ///< For kExprNodeTernaryValue. +    struct { +      ExprComparisonType type;  ///< Comparison type. +      ExprCaseCompareStrategy ccs;  ///< Case comparison strategy. +      bool inv;  ///< True if comparison is to be inverted. +    } cmp;  ///< For kExprNodeComparison. +    struct { +      uvarnumber_T value; +    } num;  ///< For kExprNodeInteger. +    struct { +      float_T value; +    } flt;  ///< For kExprNodeFloat. +    struct { +      char *value; +      size_t size; +    } str;  ///< For kExprNodeSingleQuotedString and +            ///< kExprNodeDoubleQuotedString. +    struct { +      const char *ident;  ///< Option name start. +      size_t ident_len;  ///< Option name length. +      ExprOptScope scope;  ///< Option scope: &l:, &g: or not specified. +    } opt;  ///< For kExprNodeOption. +    struct { +      const char *ident;  ///< Environment variable name start. +      size_t ident_len;  ///< Environment variable name length. +    } env;  ///< For kExprNodeEnvironment. +    struct { +      ExprAssignmentType type; +    } ass;  ///< For kExprNodeAssignment +  } data; +}; + +enum { +  /// Allow multiple expressions in a row: e.g. for :echo +  /// +  /// Parser will still parse only one of them though. +  kExprFlagsMulti = (1 << 0), +  /// Allow NL, NUL and bar to be EOC +  /// +  /// When parsing expressions input by user bar is assumed to be a binary +  /// operator and other two are spacings. +  kExprFlagsDisallowEOC = (1 << 1), +  /// Parse :let argument +  /// +  /// That mean that top level node must be an assignment and first nodes +  /// belong to lvalues. +  kExprFlagsParseLet = (1 << 2), +  // XXX whenever you add a new flag, alter klee_assume() statement in +  //     viml_expressions_parser.c, nvim_parse_expression() flags parsing +  //     alongside with its documentation and flag sets in check_parsing() +  //     function in expressions parser functional and unit tests. +} ExprParserFlags; + +/// AST error definition +typedef struct { +  /// Error message. Must contain a single printf format atom: %.*s. +  const char *msg; +  /// Error message argument: points to the location of the error. +  const char *arg; +  /// Message argument length: length till the end of string. +  int arg_len; +} ExprASTError; + +/// Structure representing complety AST for one expression +typedef struct { +  /// When AST is not correct this message will be printed. +  /// +  /// Uses `emsgf(msg, arg_len, arg);`, `msg` is assumed to contain only `%.*s`. +  ExprASTError err; +  /// Root node of the AST. +  ExprASTNode *root; +} ExprAST; + +/// Array mapping ExprASTNodeType to maximum amount of children node may have +extern const uint8_t node_maxchildren[]; + +/// Array mapping ExprASTNodeType values to their stringified versions +extern const char *const east_node_type_tab[]; + +/// Array mapping ExprComparisonType values to their stringified versions +extern const char *const eltkn_cmp_type_tab[]; + +/// Array mapping ExprCaseCompareStrategy values to their stringified versions +extern const char *const ccs_tab[]; + +/// Array mapping ExprAssignmentType values to their stringified versions +extern const char *const expr_asgn_type_tab[]; + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "viml/parser/expressions.h.generated.h" +#endif + +#endif  // NVIM_VIML_PARSER_EXPRESSIONS_H diff --git a/src/nvim/viml/parser/parser.c b/src/nvim/viml/parser/parser.c new file mode 100644 index 0000000000..8d26d08ea7 --- /dev/null +++ b/src/nvim/viml/parser/parser.c @@ -0,0 +1,16 @@ +// This is an open source non-commercial project. Dear PVS-Studio, please check +// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com + +#include "nvim/viml/parser/parser.h" + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "viml/parser/parser.c.generated.h" +#endif + + +void parser_simple_get_line(void *cookie, ParserLine *ret_pline) +{ +  ParserLine **plines_p = (ParserLine **)cookie; +  *ret_pline = **plines_p; +  (*plines_p)++; +} diff --git a/src/nvim/viml/parser/parser.h b/src/nvim/viml/parser/parser.h new file mode 100644 index 0000000000..7ac49709d8 --- /dev/null +++ b/src/nvim/viml/parser/parser.h @@ -0,0 +1,244 @@ +#ifndef NVIM_VIML_PARSER_PARSER_H +#define NVIM_VIML_PARSER_PARSER_H + +#include <stdbool.h> +#include <stddef.h> +#include <assert.h> + +#include "nvim/lib/kvec.h" +#include "nvim/func_attr.h" +#include "nvim/mbyte.h" +#include "nvim/memory.h" + +/// One parsed line +typedef struct { +  const char *data;  ///< Parsed line pointer +  size_t size;  ///< Parsed line size +  bool allocated;  ///< True if line may be freed. +} ParserLine; + +/// Line getter type for parser +/// +/// Line getter must return {NULL, 0} for EOF. +typedef void (*ParserLineGetter)(void *cookie, ParserLine *ret_pline); + +/// Parser position in the input +typedef struct { +  size_t line;  ///< Line index in ParserInputReader.lines. +  size_t col;  ///< Byte index in the line. +} ParserPosition; + +/// Parser state item. +typedef struct { +  enum { +    kPTopStateParsingCommand = 0, +    kPTopStateParsingExpression, +  } type; +  union { +    struct { +      enum { +        kExprUnknown = 0, +      } type; +    } expr; +  } data; +} ParserStateItem; + +/// Structure defining input reader +typedef struct { +  /// Function used to get next line. +  ParserLineGetter get_line; +  /// Data for get_line function. +  void *cookie; +  /// All lines obtained by get_line. +  kvec_withinit_t(ParserLine, 4) lines; +  /// Conversion, for :scriptencoding. +  vimconv_T conv; +} ParserInputReader; + +/// Highlighted region definition +/// +/// Note: one chunk may highlight only one line. +typedef struct { +  ParserPosition start;  ///< Start of the highlight: line and column. +  size_t end_col;  ///< End column, points to the start of the next character. +  const char *group;  ///< Highlight group. +} ParserHighlightChunk; + +/// Highlighting defined by a parser +typedef kvec_withinit_t(ParserHighlightChunk, 16) ParserHighlight; + +/// Structure defining parser state +typedef struct { +  /// Line reader. +  ParserInputReader reader; +  /// Position up to which input was parsed. +  ParserPosition pos; +  /// Parser state stack. +  kvec_withinit_t(ParserStateItem, 16) stack; +  /// Highlighting support. +  ParserHighlight *colors; +  /// True if line continuation can be used. +  bool can_continuate; +} ParserState; + +static inline void viml_parser_init( +    ParserState *const ret_pstate, +    const ParserLineGetter get_line, void *const cookie, +    ParserHighlight *const colors) +  REAL_FATTR_ALWAYS_INLINE REAL_FATTR_NONNULL_ARG(1, 2); + +/// Initialize a new parser state instance +/// +/// @param[out]  ret_pstate  Parser state to initialize. +/// @param[in]  get_line  Line getter function. +/// @param[in]  cookie  Argument for the get_line function. +/// @param[in]  colors  Where to save highlighting. May be NULL if it is not +///                     needed. +static inline void viml_parser_init( +    ParserState *const ret_pstate, +    const ParserLineGetter get_line, void *const cookie, +    ParserHighlight *const colors) +{ +  *ret_pstate = (ParserState) { +    .reader = { +      .get_line = get_line, +      .cookie = cookie, +      .conv = MBYTE_NONE_CONV, +    }, +    .pos = { 0, 0 }, +    .colors = colors, +    .can_continuate = false, +  }; +  kvi_init(ret_pstate->reader.lines); +  kvi_init(ret_pstate->stack); +} + +static inline void viml_parser_destroy(ParserState *const pstate) +  REAL_FATTR_NONNULL_ALL REAL_FATTR_ALWAYS_INLINE; + +/// Free all memory allocated by the parser on heap +/// +/// @param  pstate  Parser state to free. +static inline void viml_parser_destroy(ParserState *const pstate) +{ +  for (size_t i = 0; i < kv_size(pstate->reader.lines); i++) { +    ParserLine pline = kv_A(pstate->reader.lines, i); +    if (pline.allocated) { +      xfree((void *)pline.data); +    } +  } +  kvi_destroy(pstate->reader.lines); +  kvi_destroy(pstate->stack); +} + +static inline void viml_preader_get_line(ParserInputReader *const preader, +                                         ParserLine *const ret_pline) +  REAL_FATTR_NONNULL_ALL; + +/// Get one line from ParserInputReader +static inline void viml_preader_get_line(ParserInputReader *const preader, +                                         ParserLine *const ret_pline) +{ +  ParserLine pline; +  preader->get_line(preader->cookie, &pline); +  if (preader->conv.vc_type != CONV_NONE && pline.size) { +    ParserLine cpline = { +      .allocated = true, +      .size = pline.size, +    }; +    cpline.data = (char *)string_convert(&preader->conv, +                                         (char_u *)pline.data, +                                         &cpline.size); +    if (pline.allocated) { +      xfree((void *)pline.data); +    } +    pline = cpline; +  } +  kvi_push(preader->lines, pline); +  *ret_pline = pline; +} + +static inline bool viml_parser_get_remaining_line(ParserState *const pstate, +                                                  ParserLine *const ret_pline) +  REAL_FATTR_ALWAYS_INLINE REAL_FATTR_WARN_UNUSED_RESULT REAL_FATTR_NONNULL_ALL; + +/// Get currently parsed line, shifted to pstate->pos.col +/// +/// @param  pstate  Parser state to operate on. +/// +/// @return True if there is a line, false in case of EOF. +static inline bool viml_parser_get_remaining_line(ParserState *const pstate, +                                                  ParserLine *const ret_pline) +{ +  const size_t num_lines = kv_size(pstate->reader.lines); +  if (pstate->pos.line == num_lines) { +    viml_preader_get_line(&pstate->reader, ret_pline); +  } else { +    *ret_pline = kv_last(pstate->reader.lines); +  } +  assert(pstate->pos.line == kv_size(pstate->reader.lines) - 1); +  if (ret_pline->data != NULL) { +    ret_pline->data += pstate->pos.col; +    ret_pline->size -= pstate->pos.col; +  } +  return ret_pline->data != NULL; +} + +static inline void viml_parser_advance(ParserState *const pstate, +                                       const size_t len) +  REAL_FATTR_ALWAYS_INLINE REAL_FATTR_NONNULL_ALL; + +/// Advance position by a given number of bytes +/// +/// At maximum advances to the next line. +/// +/// @param  pstate  Parser state to advance. +/// @param[in]  len  Number of bytes to advance. +static inline void viml_parser_advance(ParserState *const pstate, +                                       const size_t len) +{ +  assert(pstate->pos.line == kv_size(pstate->reader.lines) - 1); +  const ParserLine pline = kv_last(pstate->reader.lines); +  if (pstate->pos.col + len >= pline.size) { +    pstate->pos.line++; +    pstate->pos.col = 0; +  } else { +    pstate->pos.col += len; +  } +} + +static inline void viml_parser_highlight(ParserState *const pstate, +                                         const ParserPosition start, +                                         const size_t end_col, +                                         const char *const group) +  REAL_FATTR_ALWAYS_INLINE REAL_FATTR_NONNULL_ALL; + +/// Record highlighting of some region of text +/// +/// @param  pstate  Parser state to work with. +/// @param[in]  start  Start position of the highlight. +/// @param[in]  len  Highlighting chunk length. +/// @param[in]  group  Highlight group. +static inline void viml_parser_highlight(ParserState *const pstate, +                                         const ParserPosition start, +                                         const size_t len, +                                         const char *const group) +{ +  if (pstate->colors == NULL || len == 0) { +    return; +  } +  assert(kv_size(*pstate->colors) == 0 +         || kv_Z(*pstate->colors, 0).start.line < start.line +         || kv_Z(*pstate->colors, 0).end_col <= start.col); +  kvi_push(*pstate->colors, ((ParserHighlightChunk) { +      .start = start, +      .end_col = start.col + len, +      .group = group, +  })); +} + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "viml/parser/parser.h.generated.h" +#endif + +#endif  // NVIM_VIML_PARSER_PARSER_H | 
