From 0300c4d10991fb6ce218d45c4fe6d71a73f07d62 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 20 Aug 2017 18:40:22 +0300 Subject: viml/expressions: Add lexer with some basic tests --- src/nvim/viml/parser/expressions.c | 367 +++++++++++++++++++++++++++++++++++++ src/nvim/viml/parser/expressions.h | 118 ++++++++++++ src/nvim/viml/parser/parser.h | 129 +++++++++++++ 3 files changed, 614 insertions(+) create mode 100644 src/nvim/viml/parser/expressions.c create mode 100644 src/nvim/viml/parser/expressions.h create mode 100644 src/nvim/viml/parser/parser.h (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c new file mode 100644 index 0000000000..0164de3a14 --- /dev/null +++ b/src/nvim/viml/parser/expressions.c @@ -0,0 +1,367 @@ +// This is an open source non-commercial project. Dear PVS-Studio, please check +// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com + +/// VimL expression parser + +#include +#include +#include +#include + +#include "nvim/vim.h" +#include "nvim/memory.h" +#include "nvim/types.h" +#include "nvim/charset.h" +#include "nvim/ascii.h" + +#include "nvim/viml/parser/expressions.h" +#include "nvim/viml/parser/parser.h" + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "viml/parser/expressions.c.generated.h" +#endif + +/// Character used as a separator in autoload function/variable names. +#define AUTOLOAD_CHAR '#' + +/// Get next token for the VimL expression input +LexExprToken viml_pexpr_next_token(ParserState *const pstate) + FUNC_ATTR_WARN_UNUSED_RESULT +{ + LexExprToken ret = { + .type = kExprLexInvalid, + .start = pstate->pos, + }; + ParserLine pline; + if (!viml_parser_get_remaining_line(pstate, &pline)) { + ret.type = kExprLexEOC; + return ret; + } + if (pline.size <= 0) { + ret.len = 0; + ret.type = kExprLexEOC; + goto viml_pexpr_next_token_adv_return; + } + ret.len = 1; + const uint8_t schar = (uint8_t)pline.data[0]; +#define GET_CCS(ret, pline) \ + do { \ + if (ret.len < pline.size \ + && strchr("?#", pline.data[ret.len]) != NULL) { \ + ret.data.cmp.ccs = \ + (CaseCompareStrategy)pline.data[ret.len]; \ + ret.len++; \ + } else { \ + ret.data.cmp.ccs = kCCStrategyUseOption; \ + } \ + } while (0) + switch (schar) { + // Paired brackets. +#define BRACKET(typ, opning, clsing) \ + case opning: \ + case clsing: { \ + ret.type = typ; \ + ret.data.brc.closing = (schar == clsing); \ + break; \ + } + BRACKET(kExprLexParenthesis, '(', ')') + BRACKET(kExprLexBracket, '[', ']') + BRACKET(kExprLexFigureBrace, '{', '}') +#undef BRACKET + + // Single character tokens without data. +#define CHAR(typ, ch) \ + case ch: { \ + ret.type = typ; \ + break; \ + } + CHAR(kExprLexQuestion, '?') + CHAR(kExprLexColon, ':') + CHAR(kExprLexDot, '.') + CHAR(kExprLexPlus, '+') + CHAR(kExprLexComma, ',') +#undef CHAR + + // Multiplication/division/modulo. +#define MUL(mul_type, ch) \ + case ch: { \ + ret.type = kExprLexMultiplication; \ + ret.data.mul.type = mul_type; \ + break; \ + } + MUL(kExprLexMulMul, '*') + MUL(kExprLexMulDiv, '/') + MUL(kExprLexMulMod, '%') +#undef MUL + +#define CHARREG(typ, cond) \ + do { \ + ret.type = typ; \ + for (; (ret.len < pline.size \ + && cond(pline.data[ret.len])) \ + ; ret.len++) { \ + } \ + } while (0) + + // Whitespace. + case ' ': + case TAB: { + CHARREG(kExprLexSpacing, ascii_iswhite); + break; + } + + // Control character, except for NUL, NL and TAB. + case Ctrl_A: case Ctrl_B: case Ctrl_C: case Ctrl_D: case Ctrl_E: + case Ctrl_F: case Ctrl_G: case Ctrl_H: + + case Ctrl_K: case Ctrl_L: case Ctrl_M: case Ctrl_N: case Ctrl_O: + case Ctrl_P: case Ctrl_Q: case Ctrl_R: case Ctrl_S: case Ctrl_T: + case Ctrl_U: case Ctrl_V: case Ctrl_W: case Ctrl_X: case Ctrl_Y: + case Ctrl_Z: { +#define ISCTRL(schar) (schar < ' ') + CHARREG(kExprLexInvalid, ISCTRL); + ret.data.err.type = kExprLexSpacing; + ret.data.err.msg = + _("E15: Invalid control character present in input: %.*s"); + break; +#undef ISCTRL + } + + // Number. + // Note: determining whether dot is (not) a part of a float needs more + // context, so lexer does not do this. + // FIXME: Resolve ambiguity by additional argument. + case '0': case '1': case '2': case '3': case '4': case '5': case '6': + case '7': case '8': case '9': { + CHARREG(kExprLexNumber, ascii_isdigit); + break; + } + + // Environment variable. + case '$': { + CHARREG(kExprLexEnv, vim_isIDc); + break; + } + + // Normal variable/function name. + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'V': case 'W': case 'X': case 'Y': case 'Z': + case '_': { +#define ISWORD_OR_AUTOLOAD(x) \ + (ASCII_ISALNUM(x) || (x) == AUTOLOAD_CHAR || (x) == '_') +#define ISWORD(x) \ + (ASCII_ISALNUM(x) || (x) == '_') + ret.data.var.scope = 0; + ret.data.var.autoload = false; + CHARREG(kExprLexPlainIdentifier, ISWORD); + // "is" and "isnot" operators. + if ((ret.len == 2 && memcmp(pline.data, "is", 2) == 0) + || (ret.len == 5 && memcmp(pline.data, "isnot", 5) == 0)) { + ret.type = kExprLexComparison; + ret.data.cmp.type = kExprLexCmpIdentical; + ret.data.cmp.inv = (ret.len == 5); + GET_CCS(ret, pline); + // Scope: `s:`, etc. + } else if (ret.len == 1 + && pline.size > 1 + && strchr("sgvbwtla", schar) != NULL + && pline.data[ret.len] == ':') { + ret.len++; + ret.data.var.scope = schar; + CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD); + ret.data.var.autoload = ( + memchr(pline.data + 2, AUTOLOAD_CHAR, ret.len - 2) + != NULL); + // Previous CHARREG stopped at autoload character in order to make it + // possible to detect `is#`. Continue now with autoload characters + // included. + // + // Warning: there is ambiguity for the lexer: `is#Foo(1)` is a call of + // function `is#Foo()`, `1is#Foo(1)` is a comparison `1 is# Foo(1)`. This + // needs to be resolved on the higher level where context is available. + } else if (pline.size > ret.len + && pline.data[ret.len] == AUTOLOAD_CHAR) { + ret.data.var.autoload = true; + CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD); + } + break; +#undef ISWORD_OR_AUTOLOAD +#undef ISWORD + } +#undef CHARREG + + // Option. + case '&': { +#define OPTNAMEMISS(ret) \ + do { \ + ret.type = kExprLexInvalid; \ + ret.data.err.type = kExprLexOption; \ + ret.data.err.msg = _("E112: Option name missing: %.*s"); \ + } while (0) + if (pline.size > 1 && pline.data[1] == '&') { + ret.type = kExprLexAnd; + ret.len++; + break; + } + if (pline.size == 1 || !ASCII_ISALPHA(pline.data[1])) { + OPTNAMEMISS(ret); + break; + } + ret.type = kExprLexOption; + if (pline.size > 2 + && pline.data[2] == ':' + && strchr("gl", pline.data[1]) != NULL) { + ret.len += 2; + ret.data.opt.scope = (pline.data[1] == 'g' + ? kExprLexOptGlobal + : kExprLexOptLocal); + ret.data.opt.name = pline.data + 3; + } else { + ret.data.opt.scope = kExprLexOptUnspecified; + ret.data.opt.name = pline.data + 1; + } + const char *p = ret.data.opt.name; + const char *const e = pline.data + pline.size; + if (e - p >= 4 && p[0] == 't' && p[1] == '_') { + ret.data.opt.len = 4; + ret.len += 4; + } else { + for (; p < e && ASCII_ISALPHA(*p); p++) { + } + ret.data.opt.len = (size_t)(p - ret.data.opt.name); + if (ret.data.opt.len == 0) { + OPTNAMEMISS(ret); + } else { + ret.len += ret.data.opt.len; + } + } + break; +#undef OPTNAMEMISS + } + + // Register. + case '@': { + ret.type = kExprLexRegister; + if (pline.size > 1) { + ret.len++; + ret.data.reg.name = (uint8_t)pline.data[1]; + } else { + ret.data.reg.name = -1; + } + break; + } + + // Single quoted string. + case '\'': { + ret.type = kExprLexSingleQuotedString; + ret.data.str.closed = false; + for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) { + if (pline.data[ret.len] == '\'') { + if (ret.len + 1 < pline.size && pline.data[ret.len + 1] == '\'') { + ret.len++; + } else { + ret.data.str.closed = true; + } + } + } + break; + } + + // Double quoted string. + case '"': { + ret.type = kExprLexDoubleQuotedString; + ret.data.str.closed = false; + for (; ret.len < pline.size && !ret.data.str.closed; ret.len++) { + if (pline.data[ret.len] == '\\') { + if (ret.len + 1 < pline.size) { + ret.len++; + } + } else if (pline.data[ret.len] == '"') { + ret.data.str.closed = true; + } + } + break; + } + + // Unary not, (un)equality and regex (not) match comparison operators. + case '!': + case '=': { + if (pline.size == 1) { +viml_pexpr_next_token_invalid_comparison: + ret.type = (schar == '!' ? kExprLexNot : kExprLexInvalid); + if (ret.type == kExprLexInvalid) { + ret.data.err.msg = _("E15: Expected == or =~: %.*s"); + ret.data.err.type = kExprLexComparison; + } + break; + } + ret.type = kExprLexComparison; + ret.data.cmp.inv = (schar == '!'); + if (pline.data[1] == '=') { + ret.data.cmp.type = kExprLexCmpEqual; + ret.len++; + } else if (pline.data[1] == '~') { + ret.data.cmp.type = kExprLexCmpMatches; + ret.len++; + } else { + goto viml_pexpr_next_token_invalid_comparison; + } + GET_CCS(ret, pline); + break; + } + + // Less/greater [or equal to] comparison operators. + case '>': + case '<': { + ret.type = kExprLexComparison; + const bool haseqsign = (pline.size > 1 && pline.data[1] == '='); + if (haseqsign) { + ret.len++; + } + GET_CCS(ret, pline); + ret.data.cmp.inv = (schar == '<'); + ret.data.cmp.type = ((ret.data.cmp.inv ^ haseqsign) + ? kExprLexCmpGreaterOrEqual + : kExprLexCmpGreater); + break; + } + + // Minus sign or arrow from lambdas. + case '-': { + if (pline.size > 1 && pline.data[1] == '>') { + ret.len++; + ret.type = kExprLexArrow; + } else { + ret.type = kExprLexMinus; + } + break; + } + + // Expression end because Ex command ended. + case NUL: + case NL: { + ret.type = kExprLexEOC; + break; + } + + // Everything else is not valid. + default: { + ret.len = (size_t)utfc_ptr2len_len((const char_u *)pline.data, + (int)pline.size); + ret.type = kExprLexInvalid; + ret.data.err.type = kExprLexPlainIdentifier; + ret.data.err.msg = _("E15: Unidentified character: %.*s"); + break; + } + } +#undef GET_CCS +viml_pexpr_next_token_adv_return: + viml_parser_advance(pstate, ret.len); + return ret; +} diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h new file mode 100644 index 0000000000..52354760a5 --- /dev/null +++ b/src/nvim/viml/parser/expressions.h @@ -0,0 +1,118 @@ +#ifndef NVIM_VIML_PARSER_EXPRESSIONS_H +#define NVIM_VIML_PARSER_EXPRESSIONS_H + +#include +#include + +#include "nvim/types.h" +#include "nvim/viml/parser/parser.h" + +// Defines whether to ignore case: +// == kCCStrategyUseOption +// ==# kCCStrategyMatchCase +// ==? kCCStrategyIgnoreCase +typedef enum { + kCCStrategyUseOption = 0, // 0 for xcalloc + kCCStrategyMatchCase = '#', + kCCStrategyIgnoreCase = '?', +} CaseCompareStrategy; + +/// Lexer token type +typedef enum { + kExprLexInvalid = 0, ///< Invalid token, indicaten an error. + kExprLexMissing, ///< Missing token, for use in parser. + kExprLexSpacing, ///< Spaces, tabs, newlines, etc. + kExprLexEOC, ///< End of command character: NL, |, just end of stream. + + kExprLexQuestion, ///< Question mark, for use in ternary. + kExprLexColon, ///< Colon, for use in ternary. + kExprLexOr, ///< Logical or operator. + kExprLexAnd, ///< Logical and operator. + kExprLexComparison, ///< One of the comparison operators. + kExprLexPlus, ///< Plus sign. + kExprLexMinus, ///< Minus sign. + kExprLexDot, ///< Dot: either concat or subscript, also part of the float. + kExprLexMultiplication, ///< Multiplication, division or modulo operator. + + kExprLexNot, ///< Not: !. + + kExprLexNumber, ///< Integer number literal, or part of a float. + kExprLexSingleQuotedString, ///< Single quoted string literal. + kExprLexDoubleQuotedString, ///< Double quoted string literal. + kExprLexOption, ///< &optionname option value. + kExprLexRegister, ///< @r register value. + kExprLexEnv, ///< Environment $variable value. + kExprLexPlainIdentifier, ///< Identifier without scope: `abc`, `foo#bar`. + + kExprLexBracket, ///< Bracket, either opening or closing. + kExprLexFigureBrace, ///< Figure brace, either opening or closing. + kExprLexParenthesis, ///< Parenthesis, either opening or closing. + kExprLexComma, ///< Comma. + kExprLexArrow, ///< Arrow, like from lambda expressions. +} LexExprTokenType; + +/// Lexer token +typedef struct { + ParserPosition start; + size_t len; + LexExprTokenType type; + union { + struct { + enum { + kExprLexCmpEqual, ///< Equality, unequality. + kExprLexCmpMatches, ///< Matches regex, not matches regex. + kExprLexCmpGreater, ///< `>` or `<=` + kExprLexCmpGreaterOrEqual, ///< `>=` or `<`. + kExprLexCmpIdentical, ///< `is` or `isnot` + } type; ///< Comparison type. + CaseCompareStrategy ccs; ///< Case comparison strategy. + bool inv; ///< True if comparison is to be inverted. + } cmp; ///< For kExprLexComparison. + + struct { + enum { + kExprLexMulMul, ///< Real multiplication. + kExprLexMulDiv, ///< Division. + kExprLexMulMod, ///< Modulo. + } type; ///< Multiplication type. + } mul; ///< For kExprLexMultiplication. + + struct { + bool closing; ///< True if bracket/etc is a closing one. + } brc; ///< For brackets/braces/parenthesis. + + struct { + int name; ///< Register name, may be -1 if name not present. + } reg; ///< For kExprLexRegister. + + struct { + bool closed; ///< True if quote was closed. + } str; ///< For kExprLexSingleQuotedString and kExprLexDoubleQuotedString. + + struct { + const char *name; ///< Option name start. + size_t len; ///< Option name length. + enum { + kExprLexOptUnspecified = 0, + kExprLexOptGlobal = 1, + kExprLexOptLocal = 2, + } scope; ///< Option scope: &l:, &g: or not specified. + } opt; ///< Option properties. + + struct { + int scope; ///< Scope character or 0 if not present. + bool autoload; ///< Has autoload characters. + } var; ///< For kExprLexPlainIdentifier + + struct { + LexExprTokenType type; ///< Suggested type for parsing incorrect code. + const char *msg; ///< Error message. + } err; ///< For kExprLexInvalid + } data; ///< Additional data, if needed. +} LexExprToken; + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "viml/parser/expressions.h.generated.h" +#endif + +#endif // NVIM_VIML_PARSER_EXPRESSIONS_H diff --git a/src/nvim/viml/parser/parser.h b/src/nvim/viml/parser/parser.h new file mode 100644 index 0000000000..ec582294e1 --- /dev/null +++ b/src/nvim/viml/parser/parser.h @@ -0,0 +1,129 @@ +#ifndef NVIM_VIML_PARSER_PARSER_H +#define NVIM_VIML_PARSER_PARSER_H + +#include +#include +#include + +#include "nvim/lib/kvec.h" +#include "nvim/func_attr.h" + +/// One parsed line +typedef struct { + const char *data; ///< Parsed line pointer + size_t size; ///< Parsed line size +} ParserLine; + +/// Line getter type for parser +/// +/// Line getter must return {NULL, 0} for EOF. +typedef void (*ParserLineGetter)(void *cookie, ParserLine *ret_pline); + +/// Parser position in the input +typedef struct { + size_t line; ///< Line index in ParserInputReader.lines. + size_t col; ///< Byte index in the line. +} ParserPosition; + +/// Parser state item. +typedef struct { + enum { + kPTopStateParsingCommand = 0, + kPTopStateParsingExpression, + } type; + union { + struct { + enum { + kExprUnknown = 0, + } type; + } expr; + } data; +} ParserStateItem; + +/// Structure defining input reader +typedef struct { + /// Function used to get next line. + ParserLineGetter get_line; + /// Data for get_line function. + void *cookie; + /// All lines obtained by get_line. + kvec_withinit_t(ParserLine, 4) lines; +} ParserInputReader; + +/// Highlighted region definition +/// +/// Note: one chunk may highlight only one line. +typedef struct { + ParserPosition start; ///< Start of the highlight: line and column. + size_t end_col; ///< End column, points to the start of the next character. + const char *group; ///< Highlight group. +} ParserHighlightChunk; + +/// Highlighting defined by a parser +typedef kvec_withinit_t(ParserHighlightChunk, 16) ParserHighlight; + +/// Structure defining parser state +typedef struct { + /// Line reader. + ParserInputReader reader; + /// Position up to which input was parsed. + ParserPosition pos; + /// Parser state stack. + kvec_withinit_t(ParserStateItem, 16) stack; + /// Highlighting support. + ParserHighlight *colors; + /// True if line continuation can be used. + bool can_continuate; +} ParserState; + +static inline bool viml_parser_get_remaining_line(ParserState *const pstate, + ParserLine *const ret_pline) + REAL_FATTR_ALWAYS_INLINE REAL_FATTR_WARN_UNUSED_RESULT REAL_FATTR_NONNULL_ALL; + +/// Get currently parsed line, shifted to pstate->pos.col +/// +/// @param pstate Parser state to operate on. +/// +/// @return True if there is a line, false in case of EOF. +static inline bool viml_parser_get_remaining_line(ParserState *const pstate, + ParserLine *const ret_pline) +{ + const size_t num_lines = kv_size(pstate->reader.lines); + if (pstate->pos.line == num_lines) { + pstate->reader.get_line(pstate->reader.cookie, ret_pline); + kvi_push(pstate->reader.lines, *ret_pline); + } else { + *ret_pline = kv_last(pstate->reader.lines); + } + assert(pstate->pos.line == kv_size(pstate->reader.lines) - 1); + if (ret_pline->data != NULL) { + ret_pline->data += pstate->pos.col; + ret_pline->size -= pstate->pos.col; + } + return ret_pline->data != NULL; +} + +static inline void viml_parser_advance(ParserState *const pstate, + const size_t len) + REAL_FATTR_ALWAYS_INLINE REAL_FATTR_NONNULL_ALL; + +/// Advance position by a given number of bytes +/// +/// At maximum advances to the next line. +/// +/// @param pstate Parser state to advance. +/// @param[in] len Number of bytes to advance. +static inline void viml_parser_advance(ParserState *const pstate, + const size_t len) +{ + assert(pstate->pos.line == kv_size(pstate->reader.lines) - 1); + const ParserLine pline = kv_last(pstate->reader.lines); + if (pstate->pos.col + len >= pline.size) { + pstate->pos.line++; + pstate->pos.col = 0; + } else { + pstate->pos.col += len; + } +} + +#endif // NVIM_VIML_PARSER_PARSER_H -- cgit From 2d8b9937deae3731143f4ea44e5c41715fe1363a Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 20 Aug 2017 20:40:59 +0300 Subject: viml/parser: Handle encoding conversions --- src/nvim/viml/parser/expressions.c | 13 ++++++++++--- src/nvim/viml/parser/parser.h | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 5 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 0164de3a14..c29fac9cb4 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -25,8 +25,13 @@ #define AUTOLOAD_CHAR '#' /// Get next token for the VimL expression input -LexExprToken viml_pexpr_next_token(ParserState *const pstate) - FUNC_ATTR_WARN_UNUSED_RESULT +/// +/// @param pstate Parser state. +/// @param[in] peek If true, do not advance pstate cursor. +/// +/// @return Next token. +LexExprToken viml_pexpr_next_token(ParserState *const pstate, const bool peek) + FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL { LexExprToken ret = { .type = kExprLexInvalid, @@ -362,6 +367,8 @@ viml_pexpr_next_token_invalid_comparison: } #undef GET_CCS viml_pexpr_next_token_adv_return: - viml_parser_advance(pstate, ret.len); + if (!peek) { + viml_parser_advance(pstate, ret.len); + } return ret; } diff --git a/src/nvim/viml/parser/parser.h b/src/nvim/viml/parser/parser.h index ec582294e1..231e43b4c7 100644 --- a/src/nvim/viml/parser/parser.h +++ b/src/nvim/viml/parser/parser.h @@ -7,11 +7,13 @@ #include "nvim/lib/kvec.h" #include "nvim/func_attr.h" +#include "nvim/mbyte.h" /// One parsed line typedef struct { const char *data; ///< Parsed line pointer size_t size; ///< Parsed line size + bool allocated; ///< True if line may be freed. } ParserLine; /// Line getter type for parser @@ -48,6 +50,8 @@ typedef struct { void *cookie; /// All lines obtained by get_line. kvec_withinit_t(ParserLine, 4) lines; + /// Conversion, for :scriptencoding. + vimconv_T conv; } ParserInputReader; /// Highlighted region definition @@ -76,6 +80,33 @@ typedef struct { bool can_continuate; } ParserState; +static inline void viml_preader_get_line(ParserInputReader *const preader, + ParserLine *const ret_pline) + REAL_FATTR_NONNULL_ALL; + +/// Get one line from ParserInputReader +static inline void viml_preader_get_line(ParserInputReader *const preader, + ParserLine *const ret_pline) +{ + ParserLine pline; + preader->get_line(preader->cookie, &pline); + if (preader->conv.vc_type != CONV_NONE && pline.size) { + ParserLine cpline = { + .allocated = true, + .size = pline.size, + }; + cpline.data = (char *)string_convert(&preader->conv, + (char_u *)pline.data, + &cpline.size); + if (pline.allocated) { + xfree((void *)pline.data); + } + pline = cpline; + } + kvi_push(preader->lines, pline); + *ret_pline = pline; +} + static inline bool viml_parser_get_remaining_line(ParserState *const pstate, ParserLine *const ret_pline) REAL_FATTR_ALWAYS_INLINE REAL_FATTR_WARN_UNUSED_RESULT REAL_FATTR_NONNULL_ALL; @@ -90,8 +121,7 @@ static inline bool viml_parser_get_remaining_line(ParserState *const pstate, { const size_t num_lines = kv_size(pstate->reader.lines); if (pstate->pos.line == num_lines) { - pstate->reader.get_line(pstate->reader.cookie, ret_pline); - kvi_push(pstate->reader.lines, *ret_pline); + viml_preader_get_line(&pstate->reader, ret_pline); } else { *ret_pline = kv_last(pstate->reader.lines); } -- cgit From 1265da028882d9877a5ebbd3f3f52cb4b52a4b94 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 3 Sep 2017 19:53:41 +0300 Subject: viml/parser: Add helper functions for highlighting --- src/nvim/viml/parser/parser.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/parser.h b/src/nvim/viml/parser/parser.h index 231e43b4c7..a17edac403 100644 --- a/src/nvim/viml/parser/parser.h +++ b/src/nvim/viml/parser/parser.h @@ -156,4 +156,33 @@ static inline void viml_parser_advance(ParserState *const pstate, } } +static inline void viml_parser_highlight(ParserState *const pstate, + const ParserPosition start, + const size_t end_col, + const char *const group) + REAL_FATTR_ALWAYS_INLINE REAL_FATTR_NONNULL_ALL; + +/// Record highlighting of some region of text +/// +/// @param pstate Parser state to work with. +/// @param[in] start Start position of the highlight. +/// @param[in] len Highlighting chunk length. +/// @param[in] group Highlight group. +static inline void viml_parser_highlight(ParserState *const pstate, + const ParserPosition start, + const size_t len, + const char *const group) +{ + if (pstate->colors == NULL) { + return; + } + // TODO(ZyX-I): May do some assert() sanitizing here. + // TODO(ZyX-I): May join chunks. + kvi_push(*pstate->colors, ((ParserHighlightChunk) { + .start = start, + .end_col = start.col + len, + .group = group, + })); +} + #endif // NVIM_VIML_PARSER_PARSER_H -- cgit From 430e516d3ac1235c1ee3009a8a36089bf278440e Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 3 Sep 2017 21:58:16 +0300 Subject: viml/parser/expressions: Start creating expressions parser Currently supported nodes: - Register as it is one of the simplest value nodes (even numbers are not that simple with that dot handling). - Plus, both unary and binary. - Parenthesis, both nesting and calling. Note regarding unit tests: it stores data for AST in highlighting in strings in place of tables because luassert fails to do a good job at representing big tables. Squashing a bunch of data into a single string simply yields more readable result. --- src/nvim/viml/parser/expressions.c | 625 +++++++++++++++++++++++++++++++++++++ src/nvim/viml/parser/expressions.h | 74 +++++ 2 files changed, 699 insertions(+) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index c29fac9cb4..b54f2eb237 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -13,10 +13,18 @@ #include "nvim/types.h" #include "nvim/charset.h" #include "nvim/ascii.h" +#include "nvim/lib/kvec.h" #include "nvim/viml/parser/expressions.h" #include "nvim/viml/parser/parser.h" +typedef kvec_withinit_t(ExprASTNode **, 16) ExprASTStack; + +typedef enum { + kELvlOperator, ///< Operators: function call, subscripts, binary operators, … + kELvlValue, ///< Actual value: literals, variables, nested expressions. +} ExprASTLevel; + #ifdef INCLUDE_GENERATED_DECLARATIONS # include "viml/parser/expressions.c.generated.h" #endif @@ -144,6 +152,7 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const bool peek) // Environment variable. case '$': { + // FIXME: Parser function can’t be thread-safe with vim_isIDc. CHARREG(kExprLexEnv, vim_isIDc); break; } @@ -183,6 +192,7 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const bool peek) ret.data.var.autoload = ( memchr(pline.data + 2, AUTOLOAD_CHAR, ret.len - 2) != NULL); + // FIXME: Resolve ambiguity with an argument to the lexer function. // Previous CHARREG stopped at autoload character in order to make it // possible to detect `is#`. Continue now with autoload characters // included. @@ -372,3 +382,618 @@ viml_pexpr_next_token_adv_return: } return ret; } + +// start = s ternary_expr s EOC +// ternary_expr = binop_expr +// ( s Question s ternary_expr s Colon s ternary_expr s )? +// binop_expr = unaryop_expr ( binop unaryop_expr )? +// unaryop_expr = ( unaryop )? subscript_expr +// subscript_expr = subscript_expr subscript +// | value_expr +// subscript = Bracket('[') s ternary_expr s Bracket(']') +// | s Parenthesis('(') call_args Parenthesis(')') +// | Dot ( PlainIdentifier | Number )+ +// # Note: `s` before Parenthesis('(') is only valid if preceding subscript_expr +// # is PlainIdentifier +// value_expr = ( float | Number +// | DoubleQuotedString | SingleQuotedString +// | paren_expr +// | list_literal +// | lambda_literal +// | dict_literal +// | Environment +// | Option +// | Register +// | var ) +// float = Number Dot Number ( PlainIdentifier('e') ( Plus | Minus )? Number )? +// # Note: `1.2.3` is concat and not float. `"abc".2.3` is also concat without +// # floats. +// paren_expr = Parenthesis('(') s ternary_expr s Parenthesis(')') +// list_literal = Bracket('[') s +// ( ternary_expr s Comma s )* +// ternary_expr? s +// Bracket(']') +// dict_literal = FigureBrace('{') s +// ( ternary_expr s Colon s ternary_expr s Comma s )* +// ( ternary_expr s Colon s ternary_expr s )? +// FigureBrace('}') +// lambda_literal = FigureBrace('{') s +// ( PlainIdentifier s Comma s )* +// PlainIdentifier s +// Arrow s +// ternary_expr s +// FigureBrace('}') +// var = varchunk+ +// varchunk = PlainIdentifier +// | Comparison("is" | "is#" | "isnot" | "isnot#") +// | FigureBrace('{') s ternary_expr s FigureBrace('}') +// call_args = ( s ternary_expr s Comma s )* s ternary_expr? s +// binop = s ( Plus | Minus | Dot +// | Comparison +// | Multiplication +// | Or +// | And ) s +// unaryop = s ( Not | Plus | Minus ) s +// s = Spacing? +// +// Binary operator precedence and associativity: +// +// Operator | Precedence | Associativity +// ---------+------------+----------------- +// || | 2 | left +// && | 3 | left +// cmp* | 4 | not associative +// + - . | 5 | left +// * / % | 6 | left +// +// * comparison operators: +// +// == ==# ==? != !=# !=? +// =~ =~# =~? !~ !~# !~? +// > ># >? <= <=# <=? +// < <# = >=# >=? +// is is# is? isnot isnot# isnot? +// +// Used highlighting groups and assumed linkage: +// +// NVimInvalid -> Error +// NVimInvalidValue -> NVimInvalid +// NVimInvalidOperator -> NVimInvalid +// NVimInvalidDelimiter -> NVimInvalid +// +// NVimOperator -> Operator +// NVimUnaryOperator -> NVimOperator +// NVimBinaryOperator -> NVimOperator +// NVimComparisonOperator -> NVimOperator +// NVimTernaryOperator -> NVimOperator +// +// NVimParenthesis -> Delimiter +// +// NVimInvalidSpacing -> NVimInvalid +// NVimInvalidTernaryOperator -> NVimInvalidOperator +// NVimInvalidRegister -> NVimInvalidValue +// NVimInvalidClosingBracket -> NVimInvalidDelimiter +// NVimInvalidSpacing -> NVimInvalid +// +// NVimUnaryPlus -> NVimUnaryOperator +// NVimBinaryPlus -> NVimBinaryOperator +// NVimRegister -> SpecialChar +// NVimNestingParenthesis -> NVimParenthesis +// NVimCallingParenthesis -> NVimParenthesis + +/// Allocate a new node and set some of the values +/// +/// @param[in] type Node type to allocate. +/// @param[in] level Node level to allocate +static inline ExprASTNode *viml_pexpr_new_node(const ExprASTNodeType type) + FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_MALLOC +{ + ExprASTNode *ret = xmalloc(sizeof(*ret)); + ret->type = type; + ret->children = NULL; + ret->next = NULL; + return ret; +} + +typedef enum { + kEOpLvlInvalid = 0, + kEOpLvlParens, + kEOpLvlTernary, + kEOpLvlOr, + kEOpLvlAnd, + kEOpLvlComparison, + kEOpLvlAddition, ///< Addition, subtraction and concatenation. + kEOpLvlMultiplication, ///< Multiplication, division and modulo. + kEOpLvlUnary, ///< Unary operations: not, minus, plus. + kEOpLvlSubscript, ///< Subscripts. + kEOpLvlValue, ///< Values: literals, variables, nested expressions, … +} ExprOpLvl; + +typedef enum { + kEOpAssNo= 'n', ///< Not associative / not applicable. + kEOpAssLeft = 'l', ///< Left associativity. + kEOpAssRight = 'r', ///< Right associativity. +} ExprOpAssociativity; + +static const ExprOpLvl node_type_to_op_lvl[] = { + [kExprNodeMissing] = kEOpLvlInvalid, + [kExprNodeOpMissing] = kEOpLvlMultiplication, + + [kExprNodeNested] = kEOpLvlParens, + [kExprNodeComplexIdentifier] = kEOpLvlParens, + + [kExprNodeTernary] = kEOpLvlTernary, + + [kExprNodeBinaryPlus] = kEOpLvlAddition, + + [kExprNodeUnaryPlus] = kEOpLvlUnary, + + [kExprNodeSubscript] = kEOpLvlSubscript, + [kExprNodeCall] = kEOpLvlSubscript, + + [kExprNodeRegister] = kEOpLvlValue, + [kExprNodeListLiteral] = kEOpLvlValue, + [kExprNodePlainIdentifier] = kEOpLvlValue, +}; + +static const ExprOpAssociativity node_type_to_op_ass[] = { + [kExprNodeMissing] = kEOpAssNo, + [kExprNodeOpMissing] = kEOpAssNo, + + [kExprNodeNested] = kEOpAssNo, + [kExprNodeComplexIdentifier] = kEOpAssLeft, + + [kExprNodeTernary] = kEOpAssNo, + + [kExprNodeBinaryPlus] = kEOpAssLeft, + + [kExprNodeUnaryPlus] = kEOpAssNo, + + [kExprNodeSubscript] = kEOpAssLeft, + [kExprNodeCall] = kEOpAssLeft, + + [kExprNodeRegister] = kEOpAssNo, + [kExprNodeListLiteral] = kEOpAssNo, + [kExprNodePlainIdentifier] = kEOpAssNo, +}; + +#ifdef UNIT_TESTING +#include +REAL_FATTR_UNUSED +static inline void viml_pexpr_debug_print_ast_stack( + const ExprASTStack *const ast_stack, + const char *const msg) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE +{ + fprintf(stderr, "\n%sstack: %zu:\n", msg, kv_size(*ast_stack)); + for (size_t i = 0; i < kv_size(*ast_stack); i++) { + const ExprASTNode *const *const eastnode_p = ( + (const ExprASTNode *const *)kv_A(*ast_stack, i)); + if (*eastnode_p == NULL) { + fprintf(stderr, "- %p : NULL\n", (void *)eastnode_p); + } else { + fprintf(stderr, "- %p : %p : %c : %zu:%zu:%zu\n", + (void *)eastnode_p, (void *)(*eastnode_p), (*eastnode_p)->type, + (*eastnode_p)->start.line, (*eastnode_p)->start.col, + (*eastnode_p)->len); + } + } +} +#define PSTACK(msg) \ + viml_pexpr_debug_print_ast_stack(&ast_stack, #msg) +#define PSTACK_P(msg) \ + viml_pexpr_debug_print_ast_stack(ast_stack, #msg) +#endif + +/// Handle binary operator +/// +/// This function is responsible for handling priority levels as well. +static void viml_pexpr_handle_bop(ExprASTStack *const ast_stack, + ExprASTNode *const bop_node, + ExprASTLevel *const want_level_p) + FUNC_ATTR_NONNULL_ALL +{ + ExprASTNode **top_node_p = NULL; + ExprASTNode *top_node; + ExprOpLvl top_node_lvl; + ExprOpAssociativity top_node_ass; + assert(kv_size(*ast_stack)); + const ExprOpLvl bop_node_lvl = node_type_to_op_lvl[bop_node->type]; + do { + ExprASTNode **new_top_node_p = kv_last(*ast_stack); + ExprASTNode *new_top_node = *new_top_node_p; + assert(new_top_node != NULL); + const ExprOpLvl new_top_node_lvl = node_type_to_op_lvl[new_top_node->type]; + const ExprOpAssociativity new_top_node_ass = ( + node_type_to_op_ass[new_top_node->type]); + if (top_node_p != NULL + && ((bop_node_lvl > new_top_node_lvl + || (bop_node_lvl == new_top_node_lvl + && new_top_node_ass == kEOpAssNo)))) { + break; + } + kv_drop(*ast_stack, 1); + top_node_p = new_top_node_p; + top_node = new_top_node; + top_node_lvl = new_top_node_lvl; + top_node_ass = new_top_node_ass; + } while (kv_size(*ast_stack)); + // FIXME Handle right and no associativity correctly + *top_node_p = bop_node; + bop_node->children = top_node; + assert(bop_node->children->next == NULL); + kvi_push(*ast_stack, top_node_p); + kvi_push(*ast_stack, &bop_node->children->next); + *want_level_p = kELvlValue; +} + +/// Get highlight group name +#define HL(g) (is_invalid ? "NVimInvalid" #g : "NVim" #g) + +/// Highlight current token with the given group +#define HL_CUR_TOKEN(g) \ + viml_parser_highlight(pstate, cur_token.start, cur_token.len, \ + HL(g)) + +/// Allocate new node, saving some values +#define NEW_NODE(type) \ + viml_pexpr_new_node(type) + +/// Set position of the given node to position from the given token +/// +/// @param cur_node Node to modify. +/// @param cur_token Token to set position from. +#define POS_FROM_TOKEN(cur_node, cur_token) \ + do { \ + cur_node->start = cur_token.start; \ + cur_node->len = cur_token.len; \ + } while (0) + +/// Allocate new node and set its position from the current token +/// +/// If previous token happened to contain spacing then it will be included. +/// +/// @param cur_node Variable to save allocated node to. +/// @param typ Node type. +#define NEW_NODE_WITH_CUR_POS(cur_node, typ) \ + do { \ + cur_node = NEW_NODE(typ); \ + POS_FROM_TOKEN(cur_node, cur_token); \ + if (prev_token.type == kExprLexSpacing) { \ + cur_node->start = prev_token.start; \ + cur_node->len += prev_token.len; \ + } \ + } while (0) + +// TODO(ZyX-I): actual condition +/// Check whether it is possible to have next expression after current +/// +/// For :echo: `:echo @a @a` is a valid expression. `:echo (@a @a)` is not. +#define MAY_HAVE_NEXT_EXPR \ + (kv_size(ast_stack) == 1) + +/// Record missing operator: for things like +/// +/// :echo @a @a +/// +/// (allowed) or +/// +/// :echo (@a @a) +/// +/// (parsed as OpMissing(@a, @a)). +#define OP_MISSING \ + do { \ + if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) { \ + /* Multiple expressions allowed, return without calling */ \ + /* viml_parser_advance(). */ \ + goto viml_pexpr_parse_end; \ + } else { \ + assert(*top_node_p != NULL); \ + ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Missing operator: %.*s")); \ + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOpMissing); \ + cur_node->len = 0; \ + viml_pexpr_handle_bop(&ast_stack, cur_node, &want_level); \ + is_invalid = true; \ + goto viml_pexpr_parse_process_token; \ + } \ + } while (0) + +/// Set AST error, unless AST already is not correct +/// +/// @param[out] ret_ast AST to set error in. +/// @param[in] pstate Parser state, used to get error message argument. +/// @param[in] msg Error message, assumed to be already translated and +/// containing a single %token "%.*s". +/// @param[in] start Position at which error occurred. +static inline void east_set_error(ExprAST *const ret_ast, + const ParserState *const pstate, + const char *const msg, + const ParserPosition start) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE +{ + if (!ret_ast->correct) { + return; + } + const ParserLine pline = pstate->reader.lines.items[start.line]; + ret_ast->correct = false; + ret_ast->err.msg = msg; + ret_ast->err.arg_len = (int)(pline.size - start.col); + ret_ast->err.arg = pline.data + start.col; +} + +/// Set error from the given kExprLexInvalid token and given message +#define ERROR_FROM_TOKEN_AND_MSG(cur_token, msg) \ + east_set_error(&ast, pstate, msg, cur_token.start) + +/// Set error from the given kExprLexInvalid token +#define ERROR_FROM_TOKEN(cur_token) \ + ERROR_FROM_TOKEN_AND_MSG(cur_token, cur_token.data.err.msg) + +/// Parse one VimL expression +/// +/// @param pstate Parser state. +/// @param[in] flags Additional flags, see ExprParserFlags +/// +/// @return Parsed AST. +ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) + FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL +{ + ExprAST ast = { + .correct = true, + .err = { + .msg = NULL, + .arg_len = 0, + .arg = NULL, + }, + .root = NULL, + }; + ExprASTStack ast_stack; + kvi_init(ast_stack); + kvi_push(ast_stack, &ast.root); + // Expressions stack: + // 1. *last is NULL if want_level is kExprLexValue. Indicates where expression + // is to be put. + // 2. *last is not NULL otherwise, indicates current expression to be used as + // an operator argument. + ExprASTLevel want_level = kELvlValue; + LexExprToken prev_token = { .type = kExprLexMissing }; + bool highlighted_prev_spacing = false; + do { + LexExprToken cur_token = viml_pexpr_next_token(pstate, true); + if (cur_token.type == kExprLexEOC) { + if (flags & kExprFlagsDisallowEOC) { + if (cur_token.len == 0) { + // It is end of string, break. + break; + } else { + // It is NL, NUL or bar. + // + // Note: `=1 | 2` actually yields 1 in Vim without any + // errors. This will be changed here. + cur_token.type = kExprLexInvalid; + cur_token.data.err.msg = _("E15: Unexpected EOC character: %.*s"); + const ParserLine pline = ( + pstate->reader.lines.items[cur_token.start.line]); + const char eoc_char = pline.data[cur_token.start.col]; + cur_token.data.err.type = ((eoc_char == NUL || eoc_char == NL) + ? kExprLexSpacing + : kExprLexOr); + } + } else { + break; + } + } + LexExprTokenType tok_type = cur_token.type; + const bool token_invalid = (tok_type == kExprLexInvalid); + bool is_invalid = token_invalid; +viml_pexpr_parse_process_token: + if (tok_type == kExprLexSpacing) { + if (is_invalid) { + viml_parser_highlight(pstate, cur_token.start, cur_token.len, + HL(Spacing)); + } else { + // Do not do anything: let regular spacing be highlighted as normal. + // This also allows later to highlight spacing as invalid. + } + goto viml_pexpr_parse_cycle_end; + } else if (is_invalid && prev_token.type == kExprLexSpacing + && !highlighted_prev_spacing) { + viml_parser_highlight(pstate, prev_token.start, prev_token.len, + HL(Spacing)); + is_invalid = false; + highlighted_prev_spacing = true; + } + ExprASTNode **const top_node_p = kv_last(ast_stack); + ExprASTNode *cur_node = NULL; + // Keep these two asserts separate for debugging purposes. + assert(want_level == kELvlValue || *top_node_p != NULL); + assert(want_level != kELvlValue || *top_node_p == NULL); + switch (tok_type) { + case kExprLexEOC: { + assert(false); + } + case kExprLexInvalid: { + ERROR_FROM_TOKEN(cur_token); + tok_type = cur_token.data.err.type; + goto viml_pexpr_parse_process_token; + } + case kExprLexRegister: { + if (want_level == kELvlValue) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeRegister); + cur_node->data.reg.name = cur_token.data.reg.name; + *top_node_p = cur_node; + want_level = kELvlOperator; + viml_parser_highlight(pstate, cur_token.start, cur_token.len, + HL(Register)); + } else { + // Register in operator position: e.g. @a @a + OP_MISSING; + } + break; + } + case kExprLexPlus: { + if (want_level == kELvlValue) { + // Value level: assume unary plus + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnaryPlus); + *top_node_p = cur_node; + kvi_push(ast_stack, &cur_node->children); + HL_CUR_TOKEN(UnaryPlus); + } else if (want_level < kELvlValue) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeBinaryPlus); + viml_pexpr_handle_bop(&ast_stack, cur_node, &want_level); + HL_CUR_TOKEN(BinaryPlus); + } + want_level = kELvlValue; + break; + } + case kExprLexParenthesis: { + if (cur_token.data.brc.closing) { + if (want_level == kELvlValue) { + if (kv_size(ast_stack) > 1) { + const ExprASTNode *const prev_top_node = *kv_Z(ast_stack, 1); + if (prev_top_node->type == kExprNodeCall) { + // Function call without arguments, this is not an error. + // But further code does not expect NULL nodes. + kv_drop(ast_stack, 1); + goto viml_pexpr_parse_no_paren_closing_error; + } + } + is_invalid = true; + ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected value: %.*s")); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing); + cur_node->len = 0; + *top_node_p = cur_node; + } else { + // Always drop the topmost value: when want_level != kELvlValue + // topmost item on stack is a *finished* left operand, which may as + // well be "(@a)" which needs not be finished. + kv_drop(ast_stack, 1); + } +viml_pexpr_parse_no_paren_closing_error: {} + ExprASTNode **new_top_node_p = NULL; + while (kv_size(ast_stack) + && (new_top_node_p == NULL + || ((*new_top_node_p)->type != kExprNodeNested + && (*new_top_node_p)->type != kExprNodeCall))) { + new_top_node_p = kv_pop(ast_stack); + } + if (new_top_node_p != NULL + && ((*new_top_node_p)->type == kExprNodeNested + || (*new_top_node_p)->type == kExprNodeCall)) { + if ((*new_top_node_p)->type == kExprNodeNested) { + HL_CUR_TOKEN(NestingParenthesis); + } else { + HL_CUR_TOKEN(CallingParenthesis); + } + } else { + // “Always drop the topmost value” branch has got rid of the single + // value stack had, so there is nothing known to enclose. Correct + // this. + if (new_top_node_p == NULL) { + new_top_node_p = top_node_p; + } + is_invalid = true; + HL_CUR_TOKEN(NestingParenthesis); + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Unexpected closing parenthesis: %.*s")); + cur_node = NEW_NODE(kExprNodeNested); + cur_node->start = cur_token.start; + cur_node->len = 0; + // Unexpected closing parenthesis, assume that it was wanted to + // enclose everything in (). + cur_node->children = *new_top_node_p; + *new_top_node_p = cur_node; + assert(cur_node->next == NULL); + } + kvi_push(ast_stack, new_top_node_p); + want_level = kELvlOperator; + } else { + if (want_level == kELvlValue) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNested); + *top_node_p = cur_node; + kvi_push(ast_stack, &cur_node->children); + HL_CUR_TOKEN(NestingParenthesis); + } else if (want_level == kELvlOperator) { + if (prev_token.type == kExprLexSpacing) { + // For some reason "function (args)" is a function call, but + // "(funcref) (args)" is not. AFAIR this somehow involves + // compatibility and Bram was commenting that this is + // intentionally inconsistent and he is not very happy with the + // situation himself. + if ((*top_node_p)->type != kExprNodePlainIdentifier + && (*top_node_p)->type != kExprNodeComplexIdentifier) { + OP_MISSING; + } + } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCall); + viml_pexpr_handle_bop(&ast_stack, cur_node, &want_level); + HL_CUR_TOKEN(CallingParenthesis); + } else { + // Currently it is impossible to reach this. + assert(false); + } + want_level = kELvlValue; + } + break; + } + } +viml_pexpr_parse_cycle_end: + prev_token = cur_token; + highlighted_prev_spacing = false; + viml_parser_advance(pstate, cur_token.len); + } while (true); +viml_pexpr_parse_end: + if (want_level == kELvlValue) { + east_set_error(&ast, pstate, _("E15: Expected value: %.*s"), pstate->pos); + } else if (kv_size(ast_stack) != 1) { + // Something may be wrong, check whether it really is. + + // Pointer to ast.root must never be dropped, so “!= 1” is expected to be + // the same as “> 1”. + assert(kv_size(ast_stack)); + // Topmost stack item must be a *finished* value, so it must not be + // analyzed. E.g. it may contain an already finished nested expression. + kv_drop(ast_stack, 1); + while (ast.correct && kv_size(ast_stack)) { + const ExprASTNode *const cur_node = (*kv_pop(ast_stack)); + // This should only happen when want_level == kELvlValue. + assert(cur_node != NULL); + switch (cur_node->type) { + case kExprNodeOpMissing: + case kExprNodeMissing: { + // Error should’ve been already reported. + break; + } + case kExprNodeCall: { + // TODO(ZyX-I): Rehighlight as invalid? + east_set_error( + &ast, pstate, + _("E116: Missing closing parenthesis for function call: %.*s"), + cur_node->start); + break; + } + case kExprNodeNested: { + // TODO(ZyX-I): Rehighlight as invalid? + east_set_error( + &ast, pstate, + _("E110: Missing closing parenthesis for nested expression" + ": %.*s"), + cur_node->start); + break; + } + case kExprNodeBinaryPlus: + case kExprNodeUnaryPlus: + case kExprNodeRegister: { + // It is OK to see these in the stack. + break; + } + // TODO(ZyX-I): handle other values + } + } + } + kvi_destroy(ast_stack); + return ast; +} + +#undef NEW_NODE +#undef HL diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index 52354760a5..13888562df 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -111,6 +111,80 @@ typedef struct { } data; ///< Additional data, if needed. } LexExprToken; +/// Expression AST node type +typedef enum { + kExprNodeMissing = 'X', + kExprNodeOpMissing = '_', + kExprNodeTernary = '?', ///< Ternary operator, valid one has three children. + kExprNodeRegister = '@', ///< Register, no children. + kExprNodeSubscript = 's', ///< Subscript, should have two or three children. + kExprNodeListLiteral = 'l', ///< List literal, any number of children. + kExprNodeUnaryPlus = 'p', + kExprNodeBinaryPlus = '+', + kExprNodeNested = 'e', ///< Nested parenthesised expression. + kExprNodeCall = 'c', ///< Function call. + /// Plain identifier: simple variable/function name + /// + /// Looks like "string", "g:Foo", etc: consists from a single + /// kExprLexPlainIdentifier token. + kExprNodePlainIdentifier = 'i', + /// Complex identifier: variable/function name with curly braces + kExprNodeComplexIdentifier = 'I', +} ExprASTNodeType; + +typedef struct expr_ast_node ExprASTNode; + +/// Structure representing one AST node +struct expr_ast_node { + ExprASTNodeType type; ///< Node type. + /// Node children: e.g. for 1 + 2 nodes 1 and 2 will be children of +. + ExprASTNode *children; + /// Next node: e.g. for 1 + 2 child nodes 1 and 2 are put into a single-linked + /// list: `(+)->children` references only node 1, node 2 is in + /// `(+)->children->next`. + ExprASTNode *next; + ParserPosition start; + size_t len; + union { + struct { + int name; ///< Register name, may be -1 if name not present. + } reg; ///< For kExprNodeRegister. + } data; +}; + +enum { + /// Allow multiple expressions in a row: e.g. for :echo + /// + /// Parser will still parse only one of them though. + kExprFlagsMulti = (1 << 0), + /// Allow NL, NUL and bar to be EOC + /// + /// When parsing expressions input by user bar is assumed to be a binary + /// operator and other two are spacings. + kExprFlagsDisallowEOC = (1 << 1), + /// Print errors when encountered + /// + /// Without the flag they are only taken into account when parsing. + kExprFlagsPrintError = (1 << 2), +} ExprParserFlags; + +/// Structure representing complety AST for one expression +typedef struct { + /// True if represented AST is correct and can be executed. Incorrect ones may + /// still be used for completion, or in linters. + bool correct; + /// When AST is not correct this message will be printed. + /// + /// Uses `emsgf(msg, arg_len, arg);`, `msg` is assumed to contain only `%.*s`. + struct { + const char *msg; + int arg_len; + const char *arg; + } err; + /// Root node of the AST. + ExprASTNode *root; +} ExprAST; + #ifdef INCLUDE_GENERATED_DECLARATIONS # include "viml/parser/expressions.h.generated.h" #endif -- cgit From 7980614650f0aedb39bf88466e5bd3ce90429cc1 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 17 Sep 2017 17:33:03 +0300 Subject: viml/parser/expressions: Add support for figure braces (three kinds) --- src/nvim/viml/parser/expressions.c | 644 +++++++++++++++++++++++++++++++++---- src/nvim/viml/parser/expressions.h | 35 ++ 2 files changed, 620 insertions(+), 59 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index b54f2eb237..f4cfed3113 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -20,10 +20,22 @@ typedef kvec_withinit_t(ExprASTNode **, 16) ExprASTStack; +/// Which nodes may be wanted typedef enum { - kELvlOperator, ///< Operators: function call, subscripts, binary operators, … - kELvlValue, ///< Actual value: literals, variables, nested expressions. -} ExprASTLevel; + /// Operators: function call, subscripts, binary operators, … + /// + /// For unrestricted expressions. + kENodeOperator, + /// Values: literals, variables, nested expressions, unary operators. + /// + /// For unrestricted expressions as well, implies that top item in AST stack + /// points to NULL. + kENodeValue, + /// Argument: only allows simple argument names. + kENodeArgument, + /// Argument separator: only allows commas. + kENodeArgumentSeparator, +} ExprASTWantedNode; #ifdef INCLUDE_GENERATED_DECLARATIONS # include "viml/parser/expressions.c.generated.h" @@ -456,6 +468,8 @@ viml_pexpr_next_token_adv_return: // // Used highlighting groups and assumed linkage: // +// NVimInternalError -> highlight as fg:red/bg:red +// // NVimInvalid -> Error // NVimInvalidValue -> NVimInvalid // NVimInvalidOperator -> NVimInvalid @@ -469,11 +483,33 @@ viml_pexpr_next_token_adv_return: // // NVimParenthesis -> Delimiter // +// NVimComma -> Delimiter +// NVimArrow -> Delimiter +// +// NVimLambda -> Delimiter +// NVimDict -> Delimiter +// NVimCurly -> Delimiter +// +// NVimIdentifier -> Identifier +// NVimIdentifierScope -> NVimIdentifier +// NVimIdentifierScopeDelimiter -> NVimIdentifier +// +// NVimFigureBrace -> NVimInternalError +// +// NVimInvalidComma -> NVimInvalidDelimiter // NVimInvalidSpacing -> NVimInvalid // NVimInvalidTernaryOperator -> NVimInvalidOperator // NVimInvalidRegister -> NVimInvalidValue // NVimInvalidClosingBracket -> NVimInvalidDelimiter // NVimInvalidSpacing -> NVimInvalid +// NVimInvalidArrow -> NVimInvalidDelimiter +// NVimInvalidLambda -> NVimInvalidDelimiter +// NVimInvalidDict -> NVimInvalidDelimiter +// NVimInvalidCurly -> NVimInvalidDelimiter +// NVimInvalidFigureBrace -> NVimInvalidDelimiter +// NVimInvalidIdentifier -> NVimInvalidValue +// NVimInvalidIdentifierScope -> NVimInvalidValue +// NVimInvalidIdentifierScopeDelimiter -> NVimInvalidValue // // NVimUnaryPlus -> NVimUnaryOperator // NVimBinaryPlus -> NVimBinaryOperator @@ -498,6 +534,9 @@ static inline ExprASTNode *viml_pexpr_new_node(const ExprASTNodeType type) typedef enum { kEOpLvlInvalid = 0, kEOpLvlParens, + kEOpLvlArrow, + kEOpLvlComma, + kEOpLvlColon, kEOpLvlTernary, kEOpLvlOr, kEOpLvlAnd, @@ -506,6 +545,7 @@ typedef enum { kEOpLvlMultiplication, ///< Multiplication, division and modulo. kEOpLvlUnary, ///< Unary operations: not, minus, plus. kEOpLvlSubscript, ///< Subscripts. + kEOpLvlComplexIdentifier, ///< Plain identifier, curly braces name. kEOpLvlValue, ///< Values: literals, variables, nested expressions, … } ExprOpLvl; @@ -520,7 +560,16 @@ static const ExprOpLvl node_type_to_op_lvl[] = { [kExprNodeOpMissing] = kEOpLvlMultiplication, [kExprNodeNested] = kEOpLvlParens, - [kExprNodeComplexIdentifier] = kEOpLvlParens, + + [kExprNodeUnknownFigure] = kEOpLvlParens, + [kExprNodeLambda] = kEOpLvlParens, + [kExprNodeDictLiteral] = kEOpLvlParens, + + [kExprNodeArrow] = kEOpLvlArrow, + + [kExprNodeComma] = kEOpLvlComma, + + [kExprNodeColon] = kEOpLvlColon, [kExprNodeTernary] = kEOpLvlTernary, @@ -531,9 +580,12 @@ static const ExprOpLvl node_type_to_op_lvl[] = { [kExprNodeSubscript] = kEOpLvlSubscript, [kExprNodeCall] = kEOpLvlSubscript, + [kExprNodeComplexIdentifier] = kEOpLvlComplexIdentifier, + [kExprNodePlainIdentifier] = kEOpLvlComplexIdentifier, + [kExprNodeCurlyBracesIdentifier] = kEOpLvlComplexIdentifier, + [kExprNodeRegister] = kEOpLvlValue, [kExprNodeListLiteral] = kEOpLvlValue, - [kExprNodePlainIdentifier] = kEOpLvlValue, }; static const ExprOpAssociativity node_type_to_op_ass[] = { @@ -541,7 +593,24 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { [kExprNodeOpMissing] = kEOpAssNo, [kExprNodeNested] = kEOpAssNo, - [kExprNodeComplexIdentifier] = kEOpAssLeft, + + [kExprNodeUnknownFigure] = kEOpAssLeft, + [kExprNodeLambda] = kEOpAssNo, + [kExprNodeDictLiteral] = kEOpAssNo, + + // Does not really matter. + [kExprNodeArrow] = kEOpAssNo, + + [kExprNodeColon] = kEOpAssNo, + + // Right associativity for comma because this means easier access to arguments + // list, etc: for "[a, b, c, d]" you can access "a" in one step if it is + // represented as "list(comma(a, comma(b, comma(c, d))))" then if it is + // "list(comma(comma(comma(a, b), c), d))" in which case you will need to + // traverse all three comma() structures. And with comma operator (including + // actual comma operator from C which is not present in VimL) nobody cares + // about associativity, only about order of execution. + [kExprNodeComma] = kEOpAssRight, [kExprNodeTernary] = kEOpAssNo, @@ -552,14 +621,31 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { [kExprNodeSubscript] = kEOpAssLeft, [kExprNodeCall] = kEOpAssLeft, + [kExprNodePlainIdentifier] = kEOpAssLeft, + [kExprNodeComplexIdentifier] = kEOpAssLeft, + [kExprNodeCurlyBracesIdentifier] = kEOpAssLeft, + [kExprNodeRegister] = kEOpAssNo, [kExprNodeListLiteral] = kEOpAssNo, - [kExprNodePlainIdentifier] = kEOpAssNo, }; #ifdef UNIT_TESTING #include REAL_FATTR_UNUSED +static inline void viml_pexpr_debug_print_ast_node( + const ExprASTNode *const *const eastnode_p, + const char *const prefix) +{ + if (*eastnode_p == NULL) { + fprintf(stderr, "%s %p : NULL\n", prefix, (void *)eastnode_p); + } else { + fprintf(stderr, "%s %p : %p : %c : %zu:%zu:%zu\n", + prefix, (void *)eastnode_p, (void *)(*eastnode_p), + (*eastnode_p)->type, (*eastnode_p)->start.line, + (*eastnode_p)->start.col, (*eastnode_p)->len); + } +} +REAL_FATTR_UNUSED static inline void viml_pexpr_debug_print_ast_stack( const ExprASTStack *const ast_stack, const char *const msg) @@ -567,22 +653,17 @@ static inline void viml_pexpr_debug_print_ast_stack( { fprintf(stderr, "\n%sstack: %zu:\n", msg, kv_size(*ast_stack)); for (size_t i = 0; i < kv_size(*ast_stack); i++) { - const ExprASTNode *const *const eastnode_p = ( - (const ExprASTNode *const *)kv_A(*ast_stack, i)); - if (*eastnode_p == NULL) { - fprintf(stderr, "- %p : NULL\n", (void *)eastnode_p); - } else { - fprintf(stderr, "- %p : %p : %c : %zu:%zu:%zu\n", - (void *)eastnode_p, (void *)(*eastnode_p), (*eastnode_p)->type, - (*eastnode_p)->start.line, (*eastnode_p)->start.col, - (*eastnode_p)->len); - } + viml_pexpr_debug_print_ast_node( + (const ExprASTNode *const *)kv_A(*ast_stack, i), + "-"); } } #define PSTACK(msg) \ viml_pexpr_debug_print_ast_stack(&ast_stack, #msg) #define PSTACK_P(msg) \ viml_pexpr_debug_print_ast_stack(ast_stack, #msg) +#define PNODE_P(eastnode_p, msg) \ + viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)ast_stack, #msg) #endif /// Handle binary operator @@ -590,7 +671,7 @@ static inline void viml_pexpr_debug_print_ast_stack( /// This function is responsible for handling priority levels as well. static void viml_pexpr_handle_bop(ExprASTStack *const ast_stack, ExprASTNode *const bop_node, - ExprASTLevel *const want_level_p) + ExprASTWantedNode *const want_node_p) FUNC_ATTR_NONNULL_ALL { ExprASTNode **top_node_p = NULL; @@ -599,6 +680,9 @@ static void viml_pexpr_handle_bop(ExprASTStack *const ast_stack, ExprOpAssociativity top_node_ass; assert(kv_size(*ast_stack)); const ExprOpLvl bop_node_lvl = node_type_to_op_lvl[bop_node->type]; +#ifndef NDEBUG + const ExprOpAssociativity bop_node_ass = node_type_to_op_ass[bop_node->type]; +#endif do { ExprASTNode **new_top_node_p = kv_last(*ast_stack); ExprASTNode *new_top_node = *new_top_node_p; @@ -606,6 +690,8 @@ static void viml_pexpr_handle_bop(ExprASTStack *const ast_stack, const ExprOpLvl new_top_node_lvl = node_type_to_op_lvl[new_top_node->type]; const ExprOpAssociativity new_top_node_ass = ( node_type_to_op_ass[new_top_node->type]); + assert(bop_node_lvl != new_top_node_lvl + || bop_node_ass == new_top_node_ass); if (top_node_p != NULL && ((bop_node_lvl > new_top_node_lvl || (bop_node_lvl == new_top_node_lvl @@ -617,14 +703,60 @@ static void viml_pexpr_handle_bop(ExprASTStack *const ast_stack, top_node = new_top_node; top_node_lvl = new_top_node_lvl; top_node_ass = new_top_node_ass; + if (bop_node_lvl == top_node_lvl && top_node_ass == kEOpAssRight) { + break; + } } while (kv_size(*ast_stack)); - // FIXME Handle right and no associativity correctly - *top_node_p = bop_node; - bop_node->children = top_node; - assert(bop_node->children->next == NULL); - kvi_push(*ast_stack, top_node_p); - kvi_push(*ast_stack, &bop_node->children->next); - *want_level_p = kELvlValue; + // FIXME: Handle no associativity + if (top_node_ass == kEOpAssLeft || top_node_lvl != bop_node_lvl) { + // outer(op(x,y)) -> outer(new_op(op(x,y),*)) + // + // Before: top_node_p = outer(*), points to op(x,y) + // Other stack elements unknown + // + // After: top_node_p = outer(*), points to new_op(op(x,y)) + // &bop_node->children->next = new_op(op(x,y),*), points to NULL + *top_node_p = bop_node; + bop_node->children = top_node; + assert(bop_node->children->next == NULL); + kvi_push(*ast_stack, top_node_p); + kvi_push(*ast_stack, &bop_node->children->next); + } else { + assert(top_node_lvl == bop_node_lvl && top_node_ass == kEOpAssRight); + assert(top_node->children != NULL && top_node->children->next != NULL); + // outer(op(x,y)) -> outer(op(x,new_op(y,*))) + // + // Before: top_node_p = outer(*), points to op(x,y) + // Other stack elements unknown + // + // After: top_node_p = outer(*), points to op(x,new_op(y)) + // &top_node->children->next = op(x,*), points to new_op(y) + // &bop_node->children->next = new_op(y,*), points to NULL + bop_node->children = top_node->children->next; + top_node->children->next = bop_node; + assert(bop_node->children->next == NULL); + kvi_push(*ast_stack, top_node_p); + kvi_push(*ast_stack, &top_node->children->next); + kvi_push(*ast_stack, &bop_node->children->next); + } + *want_node_p = (*want_node_p == kENodeArgumentSeparator + ? kENodeArgument + : kENodeValue); +} + +/// ParserPosition literal based on ParserPosition pos with columns shifted +/// +/// Function does not check whether remaining position is valid. +/// +/// @param[in] pos Position to shift. +/// @param[in] shift Number of bytes to shift. +/// +/// @return Shifted position. +static inline ParserPosition shifted_pos(const ParserPosition pos, + const size_t shift) + FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT +{ + return (ParserPosition) { .line = pos.line, .col = pos.col + shift }; } /// Get highlight group name @@ -692,12 +824,25 @@ static void viml_pexpr_handle_bop(ExprASTStack *const ast_stack, ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Missing operator: %.*s")); \ NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOpMissing); \ cur_node->len = 0; \ - viml_pexpr_handle_bop(&ast_stack, cur_node, &want_level); \ - is_invalid = true; \ + viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); \ goto viml_pexpr_parse_process_token; \ } \ } while (0) +/// Record missing value: for things like "* 5" +/// +/// @param[in] msg Error message. +#define ADD_VALUE_IF_MISSING(msg) \ + do { \ + if (want_node == kENodeValue) { \ + ERROR_FROM_TOKEN_AND_MSG(cur_token, (msg)); \ + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing); \ + cur_node->len = 0; \ + *top_node_p = cur_node; \ + want_node = kENodeOperator; \ + } \ + } while (0) + /// Set AST error, unless AST already is not correct /// /// @param[out] ret_ast AST to set error in. @@ -721,14 +866,42 @@ static inline void east_set_error(ExprAST *const ret_ast, ret_ast->err.arg = pline.data + start.col; } -/// Set error from the given kExprLexInvalid token and given message +/// Set error from the given token and given message #define ERROR_FROM_TOKEN_AND_MSG(cur_token, msg) \ - east_set_error(&ast, pstate, msg, cur_token.start) + do { \ + is_invalid = true; \ + east_set_error(&ast, pstate, msg, cur_token.start); \ + } while (0) + +/// Like #ERROR_FROM_TOKEN_AND_MSG, but gets position from a node +#define ERROR_FROM_NODE_AND_MSG(node, msg) \ + do { \ + is_invalid = true; \ + east_set_error(&ast, pstate, msg, node->start); \ + } while (0) /// Set error from the given kExprLexInvalid token #define ERROR_FROM_TOKEN(cur_token) \ ERROR_FROM_TOKEN_AND_MSG(cur_token, cur_token.data.err.msg) +/// Select figure brace type, altering highlighting as well if needed +/// +/// @param[out] node Node to modify type. +/// @param[in] new_type New type, one of ExprASTNodeType values without +/// kExprNode prefix. +/// @param[in] hl Corresponding highlighting, passed as an argument to #HL. +#define SELECT_FIGURE_BRACE_TYPE(node, new_type, hl) \ + do { \ + ExprASTNode *const node_ = (node); \ + assert(node_->type == kExprNodeUnknownFigure \ + || node_->type == kExprNode##new_type); \ + node_->type = kExprNode##new_type; \ + if (pstate->colors) { \ + kv_A(*pstate->colors, node_->data.fig.opening_hl_idx).group = \ + HL(hl); \ + } \ + } while (0) + /// Parse one VimL expression /// /// @param pstate Parser state. @@ -751,13 +924,15 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) kvi_init(ast_stack); kvi_push(ast_stack, &ast.root); // Expressions stack: - // 1. *last is NULL if want_level is kExprLexValue. Indicates where expression + // 1. *last is NULL if want_node is kExprLexValue. Indicates where expression // is to be put. // 2. *last is not NULL otherwise, indicates current expression to be used as // an operator argument. - ExprASTLevel want_level = kELvlValue; + ExprASTWantedNode want_node = kENodeValue; LexExprToken prev_token = { .type = kExprLexMissing }; bool highlighted_prev_spacing = false; + // Lambda node, valid when parsing lambda arguments only. + ExprASTNode *lambda_node = NULL; do { LexExprToken cur_token = viml_pexpr_next_token(pstate, true); if (cur_token.type == kExprLexEOC) { @@ -789,8 +964,7 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) viml_pexpr_parse_process_token: if (tok_type == kExprLexSpacing) { if (is_invalid) { - viml_parser_highlight(pstate, cur_token.start, cur_token.len, - HL(Spacing)); + HL_CUR_TOKEN(Spacing); } else { // Do not do anything: let regular spacing be highlighted as normal. // This also allows later to highlight spacing as invalid. @@ -803,11 +977,44 @@ viml_pexpr_parse_process_token: is_invalid = false; highlighted_prev_spacing = true; } + const ParserLine pline = pstate->reader.lines.items[cur_token.start.line]; ExprASTNode **const top_node_p = kv_last(ast_stack); ExprASTNode *cur_node = NULL; - // Keep these two asserts separate for debugging purposes. - assert(want_level == kELvlValue || *top_node_p != NULL); - assert(want_level != kELvlValue || *top_node_p == NULL); + assert((want_node == kENodeValue || want_node == kENodeArgument) + == (*top_node_p == NULL)); + if ((want_node == kENodeArgumentSeparator + && tok_type != kExprLexComma + && tok_type != kExprLexArrow) + || (want_node == kENodeArgument + && !(tok_type == kExprLexPlainIdentifier + && cur_token.data.var.scope == 0 + && !cur_token.data.var.autoload) + && tok_type != kExprLexArrow)) { + lambda_node->data.fig.type_guesses.allow_lambda = false; + if (lambda_node->children != NULL + && lambda_node->children->type == kExprNodeComma) { + // If lambda has comma child this means that parser has already seen at + // least "{arg1,", so node cannot possibly be anything, but lambda. + + // Vim may give E121 or E720 in this case, but it does not look right to + // have either because both are results of reevaluation possibly-lambda + // node as a dictionary and here this is not going to happen. + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Expected lambda arguments list or arrow: %.*s")); + } else { + // Else it may appear that possibly-lambda node is actually a dictionary + // or curly-braces-name identifier. + lambda_node = NULL; + if (want_node == kENodeArgumentSeparator) { + want_node = kENodeOperator; + } else { + want_node = kENodeValue; + } + } + } + assert(lambda_node == NULL + || want_node == kENodeArgumentSeparator + || want_node == kENodeArgument); switch (tok_type) { case kExprLexEOC: { assert(false); @@ -818,13 +1025,12 @@ viml_pexpr_parse_process_token: goto viml_pexpr_parse_process_token; } case kExprLexRegister: { - if (want_level == kELvlValue) { + if (want_node == kENodeValue) { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeRegister); cur_node->data.reg.name = cur_token.data.reg.name; *top_node_p = cur_node; - want_level = kELvlOperator; - viml_parser_highlight(pstate, cur_token.start, cur_token.len, - HL(Register)); + want_node = kENodeOperator; + HL_CUR_TOKEN(Register); } else { // Register in operator position: e.g. @a @a OP_MISSING; @@ -832,23 +1038,343 @@ viml_pexpr_parse_process_token: break; } case kExprLexPlus: { - if (want_level == kELvlValue) { + if (want_node == kENodeValue) { // Value level: assume unary plus NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnaryPlus); *top_node_p = cur_node; kvi_push(ast_stack, &cur_node->children); HL_CUR_TOKEN(UnaryPlus); - } else if (want_level < kELvlValue) { + } else { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeBinaryPlus); - viml_pexpr_handle_bop(&ast_stack, cur_node, &want_level); + viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); HL_CUR_TOKEN(BinaryPlus); } - want_level = kELvlValue; + want_node = kENodeValue; + break; + } + case kExprLexComma: { + assert(want_node != kENodeArgument); + if (want_node == kENodeValue) { + // Value level: comma appearing here is not valid. + // Note: in Vim string(,x) will give E116, this is not the case here. + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Expected value, got comma: %.*s")); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing); + cur_node->len = 0; + *top_node_p = cur_node; + want_node = (want_node == kENodeArgument + ? kENodeArgumentSeparator + : kENodeOperator); + } + if (want_node == kENodeArgumentSeparator) { + assert(lambda_node->data.fig.type_guesses.allow_lambda); + assert(lambda_node != NULL); + SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda); + } + if (kv_size(ast_stack) < 2) { + goto viml_pexpr_parse_invalid_comma; + } + for (size_t i = 1; i < kv_size(ast_stack); i++) { + const ExprASTNode *const *const eastnode_p = + (const ExprASTNode *const *)kv_Z(ast_stack, i); + if (!((*eastnode_p)->type == kExprNodeComma + || ((*eastnode_p)->type == kExprNodeColon + && i == 1)) + || i == kv_size(ast_stack) - 1) { + switch ((*eastnode_p)->type) { + case kExprNodeLambda: { + assert(want_node == kENodeArgumentSeparator); + break; + } + case kExprNodeDictLiteral: + case kExprNodeListLiteral: + case kExprNodeCall: { + break; + } + default: { +viml_pexpr_parse_invalid_comma: + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Comma outside of call, lambda or literal: %.*s")); + break; + } + } + break; + } + } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComma); + viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); + HL_CUR_TOKEN(Comma); + break; + } + case kExprLexColon: { + ADD_VALUE_IF_MISSING(_("E15: Expected value, got colon: %.*s")); + if (kv_size(ast_stack) < 2) { + goto viml_pexpr_parse_invalid_colon; + } + for (size_t i = 1; i < kv_size(ast_stack); i++) { + ExprASTNode *const *const eastnode_p = + (ExprASTNode *const *)kv_Z(ast_stack, i); + if ((*eastnode_p)->type != kExprNodeColon + || i == kv_size(ast_stack) - 1) { + switch ((*eastnode_p)->type) { + case kExprNodeUnknownFigure: { + SELECT_FIGURE_BRACE_TYPE((*eastnode_p), DictLiteral, Dict); + break; + } + case kExprNodeComma: + case kExprNodeDictLiteral: + case kExprNodeTernary: { + break; + } + default: { +viml_pexpr_parse_invalid_colon: + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Colon outside of dictionary or ternary operator: " + "%.*s")); + break; + } + } + break; + } + } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); + viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); + // FIXME: Handle ternary operator. + HL_CUR_TOKEN(Colon); + want_node = kENodeValue; + break; + } + case kExprLexFigureBrace: { + if (cur_token.data.brc.closing) { + ExprASTNode **new_top_node_p = NULL; + // Always drop the topmost value: + // + // 1. When want_node != kENodeValue topmost item on stack is + // a *finished* left operand, which may as well be "{@a}" which + // needs not be finished again. + // 2. Otherwise it is pointing to NULL what nobody wants. + kv_drop(ast_stack, 1); + if (!kv_size(ast_stack)) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure); + cur_node->data.fig.type_guesses.allow_lambda = false; + cur_node->data.fig.type_guesses.allow_dict = false; + cur_node->data.fig.type_guesses.allow_ident = false; + cur_node->len = 0; + if (want_node != kENodeValue) { + cur_node->children = *top_node_p; + } + *top_node_p = cur_node; + goto viml_pexpr_parse_figure_brace_closing_error; + } + if (want_node == kENodeValue) { + if ((*kv_last(ast_stack))->type != kExprNodeUnknownFigure + && (*kv_last(ast_stack))->type != kExprNodeComma) { + // kv_last being UnknownFigure may occur for empty dictionary + // literal, while Comma is expected in case of non-empty one. + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Expected value, got closing figure brace: %.*s")); + } + } else { + if (!kv_size(ast_stack)) { + new_top_node_p = top_node_p; + goto viml_pexpr_parse_figure_brace_closing_error; + } + } + do { + new_top_node_p = kv_pop(ast_stack); + } while (kv_size(ast_stack) + && (new_top_node_p == NULL + || ((*new_top_node_p)->type != kExprNodeUnknownFigure + && (*new_top_node_p)->type != kExprNodeDictLiteral + && ((*new_top_node_p)->type + != kExprNodeCurlyBracesIdentifier) + && (*new_top_node_p)->type != kExprNodeLambda))); + ExprASTNode *new_top_node = *new_top_node_p; + switch (new_top_node->type) { + case kExprNodeUnknownFigure: { + if (new_top_node->children == NULL) { + // No children of curly braces node indicates empty dictionary. + + // Should actually be kENodeArgument, but that was changed + // earlier. + assert(want_node == kENodeValue); + assert(new_top_node->data.fig.type_guesses.allow_dict); + SELECT_FIGURE_BRACE_TYPE(new_top_node, DictLiteral, Dict); + HL_CUR_TOKEN(Dict); + } else if (new_top_node->data.fig.type_guesses.allow_ident) { + SELECT_FIGURE_BRACE_TYPE(new_top_node, CurlyBracesIdentifier, + Curly); + HL_CUR_TOKEN(Curly); + } else { + // If by this time type of the node has not already been + // guessed, but it definitely is not a curly braces name then + // it is invalid for sure. + ERROR_FROM_NODE_AND_MSG( + new_top_node, + _("E15: Don't know what figure brace means: %.*s")); + if (pstate->colors) { + // Will reset to NVimInvalidFigureBrace. + kv_A(*pstate->colors, + new_top_node->data.fig.opening_hl_idx).group = ( + HL(FigureBrace)); + } + HL_CUR_TOKEN(FigureBrace); + } + break; + } + case kExprNodeDictLiteral: { + HL_CUR_TOKEN(Dict); + break; + } + case kExprNodeCurlyBracesIdentifier: { + HL_CUR_TOKEN(Curly); + break; + } + case kExprNodeLambda: { + HL_CUR_TOKEN(Lambda); + break; + } + default: { +viml_pexpr_parse_figure_brace_closing_error: + assert(!kv_size(ast_stack)); + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Unexpected closing figure brace: %.*s")); + HL_CUR_TOKEN(FigureBrace); + break; + } + } + kvi_push(ast_stack, new_top_node_p); + want_node = kENodeOperator; + } else { + if (want_node == kENodeValue) { + HL_CUR_TOKEN(FigureBrace); + // Value: may be any of lambda, dictionary literal and curly braces + // name. + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure); + cur_node->data.fig.type_guesses.allow_lambda = true; + cur_node->data.fig.type_guesses.allow_dict = true; + cur_node->data.fig.type_guesses.allow_ident = true; + if (pstate->colors) { + cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors) - 1; + } + *top_node_p = cur_node; + kvi_push(ast_stack, &cur_node->children); + want_node = kENodeArgument; + lambda_node = cur_node; + } else { + // Operator: may only be curly braces name, but only under certain + // conditions. + + // First condition is that there is no space before {. + if (prev_token.type == kExprLexSpacing) { + OP_MISSING; + } + switch ((*top_node_p)->type) { + // Second is that previous node is one of the identifiers: + // complex, plain, curly braces. + + // TODO(ZyX-I): Extend syntax to allow ${expr}. This is needed to + // handle environment variables like those bash uses for + // `export -f`: their names consist not only of alphanumeric + // characetrs. + case kExprNodeComplexIdentifier: + case kExprNodePlainIdentifier: + case kExprNodeCurlyBracesIdentifier: { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComplexIdentifier); + cur_node->len = 0; + viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); + ExprASTNode *const new_top_node = *kv_last(ast_stack); + assert(new_top_node->next == NULL); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCurlyBracesIdentifier); + new_top_node->next = cur_node; + kvi_push(ast_stack, &cur_node->children); + HL_CUR_TOKEN(Curly); + break; + } + default: { + OP_MISSING; + break; + } + } + } + } + break; + } + case kExprLexArrow: { + if (want_node == kENodeArgumentSeparator + || want_node == kENodeArgument) { + if (want_node == kENodeArgument) { + kv_drop(ast_stack, 1); + } + assert(kv_size(ast_stack) >= 1); + while ((*kv_last(ast_stack))->type != kExprNodeLambda + && (*kv_last(ast_stack))->type != kExprNodeUnknownFigure) { + kv_drop(ast_stack, 1); + } + assert((*kv_last(ast_stack)) == lambda_node); + SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow); + if (lambda_node->children == NULL) { + assert(want_node == kENodeArgument); + lambda_node->children = cur_node; + kvi_push(ast_stack, &lambda_node->children); + } else { + assert(lambda_node->children->next == NULL); + lambda_node->children->next = cur_node; + kvi_push(ast_stack, &lambda_node->children->next); + } + kvi_push(ast_stack, &cur_node->children); + lambda_node = NULL; + } else { + // Only first branch is valid. + is_invalid = true; + ADD_VALUE_IF_MISSING(_("E15: Unexpected arrow: %.*s")); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow); + viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); + } + want_node = kENodeValue; + HL_CUR_TOKEN(Arrow); + break; + } + case kExprLexPlainIdentifier: { + if (want_node == kENodeValue || want_node == kENodeArgument) { + want_node = (want_node == kENodeArgument + ? kENodeArgumentSeparator + : kENodeOperator); + // FIXME: It is not valid to have scope inside complex identifier, + // check that. + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier); + cur_node->data.var.scope = cur_token.data.var.scope; + const size_t scope_shift = (cur_token.data.var.scope == 0 + ? 0 + : 2); + cur_node->data.var.ident = (pline.data + cur_token.start.col + + scope_shift); + cur_node->data.var.ident_len = cur_token.len - scope_shift; + *top_node_p = cur_node; + if (scope_shift) { + viml_parser_highlight(pstate, cur_token.start, 1, + HL(IdentifierScope)); + viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1, + HL(IdentifierScopeDelimiter)); + } + if (scope_shift < cur_token.len) { + viml_parser_highlight(pstate, shifted_pos(cur_token.start, + scope_shift), + cur_token.len - scope_shift, + HL(Identifier)); + } + } else { + OP_MISSING; + } break; } case kExprLexParenthesis: { if (cur_token.data.brc.closing) { - if (want_level == kELvlValue) { + if (want_node == kENodeValue) { if (kv_size(ast_stack) > 1) { const ExprASTNode *const prev_top_node = *kv_Z(ast_stack, 1); if (prev_top_node->type == kExprNodeCall) { @@ -858,15 +1384,15 @@ viml_pexpr_parse_process_token: goto viml_pexpr_parse_no_paren_closing_error; } } - is_invalid = true; - ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Expected value: %.*s")); + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Expected value, got parenthesis: %.*s")); NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing); cur_node->len = 0; *top_node_p = cur_node; } else { - // Always drop the topmost value: when want_level != kELvlValue + // Always drop the topmost value: when want_node != kENodeValue // topmost item on stack is a *finished* left operand, which may as - // well be "(@a)" which needs not be finished. + // well be "(@a)" which needs not be finished again. kv_drop(ast_stack, 1); } viml_pexpr_parse_no_paren_closing_error: {} @@ -892,10 +1418,9 @@ viml_pexpr_parse_no_paren_closing_error: {} if (new_top_node_p == NULL) { new_top_node_p = top_node_p; } - is_invalid = true; - HL_CUR_TOKEN(NestingParenthesis); ERROR_FROM_TOKEN_AND_MSG( cur_token, _("E15: Unexpected closing parenthesis: %.*s")); + HL_CUR_TOKEN(NestingParenthesis); cur_node = NEW_NODE(kExprNodeNested); cur_node->start = cur_token.start; cur_node->len = 0; @@ -906,14 +1431,14 @@ viml_pexpr_parse_no_paren_closing_error: {} assert(cur_node->next == NULL); } kvi_push(ast_stack, new_top_node_p); - want_level = kELvlOperator; + want_node = kENodeOperator; } else { - if (want_level == kELvlValue) { + if (want_node == kENodeValue) { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNested); *top_node_p = cur_node; kvi_push(ast_stack, &cur_node->children); HL_CUR_TOKEN(NestingParenthesis); - } else if (want_level == kELvlOperator) { + } else if (want_node == kENodeOperator) { if (prev_token.type == kExprLexSpacing) { // For some reason "function (args)" is a function call, but // "(funcref) (args)" is not. AFAIR this somehow involves @@ -926,13 +1451,13 @@ viml_pexpr_parse_no_paren_closing_error: {} } } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCall); - viml_pexpr_handle_bop(&ast_stack, cur_node, &want_level); + viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); HL_CUR_TOKEN(CallingParenthesis); } else { // Currently it is impossible to reach this. assert(false); } - want_level = kELvlValue; + want_node = kENodeValue; } break; } @@ -943,8 +1468,9 @@ viml_pexpr_parse_cycle_end: viml_parser_advance(pstate, cur_token.len); } while (true); viml_pexpr_parse_end: - if (want_level == kELvlValue) { - east_set_error(&ast, pstate, _("E15: Expected value: %.*s"), pstate->pos); + if (want_node == kENodeValue) { + east_set_error(&ast, pstate, _("E15: Expected value, got EOC: %.*s"), + pstate->pos); } else if (kv_size(ast_stack) != 1) { // Something may be wrong, check whether it really is. @@ -956,7 +1482,7 @@ viml_pexpr_parse_end: kv_drop(ast_stack, 1); while (ast.correct && kv_size(ast_stack)) { const ExprASTNode *const cur_node = (*kv_pop(ast_stack)); - // This should only happen when want_level == kELvlValue. + // This should only happen when want_node == kENodeValue. assert(cur_node != NULL); switch (cur_node->type) { case kExprNodeOpMissing: diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index 13888562df..13640ec137 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -2,6 +2,7 @@ #define NVIM_VIML_PARSER_EXPRESSIONS_H #include +#include #include #include "nvim/types.h" @@ -130,6 +131,17 @@ typedef enum { kExprNodePlainIdentifier = 'i', /// Complex identifier: variable/function name with curly braces kExprNodeComplexIdentifier = 'I', + /// Figure brace expression which is not yet known + /// + /// May resolve to any of kExprNodeDictLiteral, kExprNodeLambda or + /// kExprNodeCurlyBracesIdentifier. + kExprNodeUnknownFigure = '{', + kExprNodeLambda = '\\', ///< Lambda. + kExprNodeDictLiteral = 'd', ///< Dictionary literal. + kExprNodeCurlyBracesIdentifier= '}', ///< Part of the curly braces name. + kExprNodeComma = ',', ///< Comma “operator”. + kExprNodeColon = ':', ///< Colon “operator”. + kExprNodeArrow = '>', ///< Arrow “operator”. } ExprASTNodeType; typedef struct expr_ast_node ExprASTNode; @@ -149,6 +161,27 @@ struct expr_ast_node { struct { int name; ///< Register name, may be -1 if name not present. } reg; ///< For kExprNodeRegister. + struct { + /// Which nodes UnknownFigure can’t possibly represent. + struct { + /// True if UnknownFigure may actually represent dictionary literal. + bool allow_dict; + /// True if UnknownFigure may actually represent lambda. + bool allow_lambda; + /// True if UnknownFigure may actually be part of curly braces name. + bool allow_ident; + } type_guesses; + /// Highlight chunk index, used for rehighlighting if needed + size_t opening_hl_idx; + } fig; ///< For kExprNodeUnknownFigure. + struct { + int scope; ///< Scope character or 0 if not present. + /// Actual identifier without scope. + /// + /// Points to inside parser reader state. + const char *ident; + size_t ident_len; ///< Actual identifier length. + } var; } data; }; @@ -166,6 +199,8 @@ enum { /// /// Without the flag they are only taken into account when parsing. kExprFlagsPrintError = (1 << 2), + // WARNING: whenever you add a new flag, alter klee_assume() statement in + // viml_expressions_parser.c. } ExprParserFlags; /// Structure representing complety AST for one expression -- cgit From d4782fb1ca05e76095086bdcbc8dcea47f532d00 Mon Sep 17 00:00:00 2001 From: ZyX Date: Tue, 26 Sep 2017 00:28:34 +0300 Subject: viml/parser/expressions: Make commas actually work when calling --- src/nvim/viml/parser/expressions.c | 99 +++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 44 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index f4cfed3113..b9abf4a067 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -395,6 +395,43 @@ viml_pexpr_next_token_adv_return: return ret; } +#ifdef UNIT_TESTING +#include +REAL_FATTR_UNUSED +static inline void viml_pexpr_debug_print_ast_node( + const ExprASTNode *const *const eastnode_p, + const char *const prefix) +{ + if (*eastnode_p == NULL) { + fprintf(stderr, "%s %p : NULL\n", prefix, (void *)eastnode_p); + } else { + fprintf(stderr, "%s %p : %p : %c : %zu:%zu:%zu\n", + prefix, (void *)eastnode_p, (void *)(*eastnode_p), + (*eastnode_p)->type, (*eastnode_p)->start.line, + (*eastnode_p)->start.col, (*eastnode_p)->len); + } +} +REAL_FATTR_UNUSED +static inline void viml_pexpr_debug_print_ast_stack( + const ExprASTStack *const ast_stack, + const char *const msg) + FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE +{ + fprintf(stderr, "\n%sstack: %zu:\n", msg, kv_size(*ast_stack)); + for (size_t i = 0; i < kv_size(*ast_stack); i++) { + viml_pexpr_debug_print_ast_node( + (const ExprASTNode *const *)kv_A(*ast_stack, i), + "-"); + } +} +#define PSTACK(msg) \ + viml_pexpr_debug_print_ast_stack(&ast_stack, #msg) +#define PSTACK_P(msg) \ + viml_pexpr_debug_print_ast_stack(ast_stack, #msg) +#define PNODE_P(eastnode_p, msg) \ + viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)ast_stack, #msg) +#endif + // start = s ternary_expr s EOC // ternary_expr = binop_expr // ( s Question s ternary_expr s Colon s ternary_expr s )? @@ -560,6 +597,9 @@ static const ExprOpLvl node_type_to_op_lvl[] = { [kExprNodeOpMissing] = kEOpLvlMultiplication, [kExprNodeNested] = kEOpLvlParens, + // Note: it is kEOpLvlSubscript for “binary operator” itself, but + // kEOpLvlParens when it comes to inside the parenthesis. + [kExprNodeCall] = kEOpLvlParens, [kExprNodeUnknownFigure] = kEOpLvlParens, [kExprNodeLambda] = kEOpLvlParens, @@ -578,7 +618,6 @@ static const ExprOpLvl node_type_to_op_lvl[] = { [kExprNodeUnaryPlus] = kEOpLvlUnary, [kExprNodeSubscript] = kEOpLvlSubscript, - [kExprNodeCall] = kEOpLvlSubscript, [kExprNodeComplexIdentifier] = kEOpLvlComplexIdentifier, [kExprNodePlainIdentifier] = kEOpLvlComplexIdentifier, @@ -593,6 +632,7 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { [kExprNodeOpMissing] = kEOpAssNo, [kExprNodeNested] = kEOpAssNo, + [kExprNodeCall] = kEOpAssNo, [kExprNodeUnknownFigure] = kEOpAssLeft, [kExprNodeLambda] = kEOpAssNo, @@ -619,7 +659,6 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { [kExprNodeUnaryPlus] = kEOpAssNo, [kExprNodeSubscript] = kEOpAssLeft, - [kExprNodeCall] = kEOpAssLeft, [kExprNodePlainIdentifier] = kEOpAssLeft, [kExprNodeComplexIdentifier] = kEOpAssLeft, @@ -629,43 +668,6 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { [kExprNodeListLiteral] = kEOpAssNo, }; -#ifdef UNIT_TESTING -#include -REAL_FATTR_UNUSED -static inline void viml_pexpr_debug_print_ast_node( - const ExprASTNode *const *const eastnode_p, - const char *const prefix) -{ - if (*eastnode_p == NULL) { - fprintf(stderr, "%s %p : NULL\n", prefix, (void *)eastnode_p); - } else { - fprintf(stderr, "%s %p : %p : %c : %zu:%zu:%zu\n", - prefix, (void *)eastnode_p, (void *)(*eastnode_p), - (*eastnode_p)->type, (*eastnode_p)->start.line, - (*eastnode_p)->start.col, (*eastnode_p)->len); - } -} -REAL_FATTR_UNUSED -static inline void viml_pexpr_debug_print_ast_stack( - const ExprASTStack *const ast_stack, - const char *const msg) - FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE -{ - fprintf(stderr, "\n%sstack: %zu:\n", msg, kv_size(*ast_stack)); - for (size_t i = 0; i < kv_size(*ast_stack); i++) { - viml_pexpr_debug_print_ast_node( - (const ExprASTNode *const *)kv_A(*ast_stack, i), - "-"); - } -} -#define PSTACK(msg) \ - viml_pexpr_debug_print_ast_stack(&ast_stack, #msg) -#define PSTACK_P(msg) \ - viml_pexpr_debug_print_ast_stack(ast_stack, #msg) -#define PNODE_P(eastnode_p, msg) \ - viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)ast_stack, #msg) -#endif - /// Handle binary operator /// /// This function is responsible for handling priority levels as well. @@ -679,17 +681,24 @@ static void viml_pexpr_handle_bop(ExprASTStack *const ast_stack, ExprOpLvl top_node_lvl; ExprOpAssociativity top_node_ass; assert(kv_size(*ast_stack)); - const ExprOpLvl bop_node_lvl = node_type_to_op_lvl[bop_node->type]; +#define NODE_LVL(typ) \ + (bop_node->type == kExprNodeCall && typ == kExprNodeCall \ + ? kEOpLvlSubscript \ + : node_type_to_op_lvl[typ]) +#define NODE_ASS(typ) \ + (bop_node->type == kExprNodeCall && typ == kExprNodeCall \ + ? kEOpAssLeft \ + : node_type_to_op_ass[typ]) + const ExprOpLvl bop_node_lvl = NODE_LVL(bop_node->type); #ifndef NDEBUG - const ExprOpAssociativity bop_node_ass = node_type_to_op_ass[bop_node->type]; + const ExprOpAssociativity bop_node_ass = NODE_ASS(bop_node->type); #endif do { ExprASTNode **new_top_node_p = kv_last(*ast_stack); ExprASTNode *new_top_node = *new_top_node_p; assert(new_top_node != NULL); - const ExprOpLvl new_top_node_lvl = node_type_to_op_lvl[new_top_node->type]; - const ExprOpAssociativity new_top_node_ass = ( - node_type_to_op_ass[new_top_node->type]); + const ExprOpLvl new_top_node_lvl = NODE_LVL(new_top_node->type); + const ExprOpAssociativity new_top_node_ass = NODE_ASS(new_top_node->type); assert(bop_node_lvl != new_top_node_lvl || bop_node_ass == new_top_node_ass); if (top_node_p != NULL @@ -742,6 +751,8 @@ static void viml_pexpr_handle_bop(ExprASTStack *const ast_stack, *want_node_p = (*want_node_p == kENodeArgumentSeparator ? kENodeArgument : kENodeValue); +#undef NODE_ASS +#undef NODE_LVL } /// ParserPosition literal based on ParserPosition pos with columns shifted -- cgit From 3cc65ac054976ef7520f0247b430ebef2f9537b7 Mon Sep 17 00:00:00 2001 From: ZyX Date: Tue, 26 Sep 2017 00:52:40 +0300 Subject: viml/parser/expressions: Make commas actually work when calling --- src/nvim/viml/parser/expressions.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index b9abf4a067..7bee779c49 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -681,24 +681,22 @@ static void viml_pexpr_handle_bop(ExprASTStack *const ast_stack, ExprOpLvl top_node_lvl; ExprOpAssociativity top_node_ass; assert(kv_size(*ast_stack)); -#define NODE_LVL(typ) \ - (bop_node->type == kExprNodeCall && typ == kExprNodeCall \ - ? kEOpLvlSubscript \ - : node_type_to_op_lvl[typ]) -#define NODE_ASS(typ) \ - (bop_node->type == kExprNodeCall && typ == kExprNodeCall \ - ? kEOpAssLeft \ - : node_type_to_op_ass[typ]) - const ExprOpLvl bop_node_lvl = NODE_LVL(bop_node->type); + const ExprOpLvl bop_node_lvl = (bop_node->type == kExprNodeCall + ? kEOpLvlSubscript + : node_type_to_op_lvl[bop_node->type]); #ifndef NDEBUG - const ExprOpAssociativity bop_node_ass = NODE_ASS(bop_node->type); + const ExprOpAssociativity bop_node_ass = ( + bop_node->type == kExprNodeCall + ? kEOpAssLeft + : node_type_to_op_ass[bop_node->type]); #endif do { ExprASTNode **new_top_node_p = kv_last(*ast_stack); ExprASTNode *new_top_node = *new_top_node_p; assert(new_top_node != NULL); - const ExprOpLvl new_top_node_lvl = NODE_LVL(new_top_node->type); - const ExprOpAssociativity new_top_node_ass = NODE_ASS(new_top_node->type); + const ExprOpLvl new_top_node_lvl = node_type_to_op_lvl[new_top_node->type]; + const ExprOpAssociativity new_top_node_ass = ( + node_type_to_op_ass[new_top_node->type]); assert(bop_node_lvl != new_top_node_lvl || bop_node_ass == new_top_node_ass); if (top_node_p != NULL @@ -751,8 +749,6 @@ static void viml_pexpr_handle_bop(ExprASTStack *const ast_stack, *want_node_p = (*want_node_p == kENodeArgumentSeparator ? kENodeArgument : kENodeValue); -#undef NODE_ASS -#undef NODE_LVL } /// ParserPosition literal based on ParserPosition pos with columns shifted -- cgit From 0987d3b10f36202e9f0289b50298e69aaf2fa4d2 Mon Sep 17 00:00:00 2001 From: ZyX Date: Tue, 26 Sep 2017 01:22:13 +0300 Subject: viml/parser/expressions: Make curly braces name actually work --- src/nvim/viml/parser/expressions.c | 116 +++++++++++++++++++++++++------------ 1 file changed, 78 insertions(+), 38 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 7bee779c49..cabf2dac58 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -909,6 +909,55 @@ static inline void east_set_error(ExprAST *const ret_ast, } \ } while (0) +/// Add identifier which should constitute complex identifier node +/// +/// This one is to be called only in case want_node is kENodeOperator. +/// +/// @param new_ident_node_code Code used to create a new identifier node and +/// update want_node and ast_stack, without +/// a trailing semicolon. +/// @param hl Highlighting name to use, passed as an argument to #HL. +#define ADD_IDENT(new_ident_node_code, hl) \ + do { \ + assert(want_node == kENodeOperator); \ + /* Operator: may only be curly braces name, but only under certain */ \ + /* conditions. */ \ +\ + /* First condition is that there is no space before a part of complex */ \ + /* identifier. */ \ + if (prev_token.type == kExprLexSpacing) { \ + OP_MISSING; \ + } \ + switch ((*top_node_p)->type) { \ + /* Second is that previous node is one of the identifiers: */ \ + /* complex, plain, curly braces. */ \ +\ + /* TODO(ZyX-I): Extend syntax to allow ${expr}. This is needed to */ \ + /* handle environment variables like those bash uses for */ \ + /* `export -f`: their names consist not only of alphanumeric */ \ + /* characetrs. */ \ + case kExprNodeComplexIdentifier: \ + case kExprNodePlainIdentifier: \ + case kExprNodeCurlyBracesIdentifier: { \ + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComplexIdentifier); \ + cur_node->len = 0; \ + cur_node->children = *top_node_p; \ + *top_node_p = cur_node; \ + kvi_push(ast_stack, &cur_node->children->next); \ + ExprASTNode **const new_top_node_p = kv_last(ast_stack); \ + assert(*new_top_node_p == NULL); \ + new_ident_node_code; \ + *new_top_node_p = cur_node; \ + HL_CUR_TOKEN(hl); \ + break; \ + } \ + default: { \ + OP_MISSING; \ + break; \ + } \ + } \ + } while (0) + /// Parse one VimL expression /// /// @param pstate Parser state. @@ -1272,40 +1321,18 @@ viml_pexpr_parse_figure_brace_closing_error: want_node = kENodeArgument; lambda_node = cur_node; } else { - // Operator: may only be curly braces name, but only under certain - // conditions. - - // First condition is that there is no space before {. - if (prev_token.type == kExprLexSpacing) { - OP_MISSING; - } - switch ((*top_node_p)->type) { - // Second is that previous node is one of the identifiers: - // complex, plain, curly braces. - - // TODO(ZyX-I): Extend syntax to allow ${expr}. This is needed to - // handle environment variables like those bash uses for - // `export -f`: their names consist not only of alphanumeric - // characetrs. - case kExprNodeComplexIdentifier: - case kExprNodePlainIdentifier: - case kExprNodeCurlyBracesIdentifier: { - NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComplexIdentifier); - cur_node->len = 0; - viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); - ExprASTNode *const new_top_node = *kv_last(ast_stack); - assert(new_top_node->next == NULL); - NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCurlyBracesIdentifier); - new_top_node->next = cur_node; - kvi_push(ast_stack, &cur_node->children); - HL_CUR_TOKEN(Curly); - break; - } - default: { - OP_MISSING; - break; - } - } + ADD_IDENT( + do { + NEW_NODE_WITH_CUR_POS(cur_node, + kExprNodeCurlyBracesIdentifier); + cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors); + cur_node->data.fig.type_guesses.allow_lambda = false; + cur_node->data.fig.type_guesses.allow_dict = false; + cur_node->data.fig.type_guesses.allow_ident = true; + kvi_push(ast_stack, &cur_node->children); + want_node = kENodeValue; + } while (0), + Curly); } } break; @@ -1351,8 +1378,6 @@ viml_pexpr_parse_figure_brace_closing_error: want_node = (want_node == kENodeArgument ? kENodeArgumentSeparator : kENodeOperator); - // FIXME: It is not valid to have scope inside complex identifier, - // check that. NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier); cur_node->data.var.scope = cur_token.data.var.scope; const size_t scope_shift = (cur_token.data.var.scope == 0 @@ -1374,8 +1399,22 @@ viml_pexpr_parse_figure_brace_closing_error: cur_token.len - scope_shift, HL(Identifier)); } + // FIXME: Actually, g{foo}g:foo is valid: "1?g{foo}g:foo" is like + // "g{foo}g" and not an error. } else { - OP_MISSING; + if (cur_token.data.var.scope == 0) { + ADD_IDENT( + do { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier); + cur_node->data.var.scope = cur_token.data.var.scope; + cur_node->data.var.ident = pline.data + cur_token.start.col; + cur_node->data.var.ident_len = cur_token.len; + want_node = kENodeOperator; + } while (0), + Identifier); + } else { + OP_MISSING; + } } break; } @@ -1453,7 +1492,8 @@ viml_pexpr_parse_no_paren_closing_error: {} // intentionally inconsistent and he is not very happy with the // situation himself. if ((*top_node_p)->type != kExprNodePlainIdentifier - && (*top_node_p)->type != kExprNodeComplexIdentifier) { + && (*top_node_p)->type != kExprNodeComplexIdentifier + && (*top_node_p)->type != kExprNodeCurlyBracesIdentifier) { OP_MISSING; } } -- cgit From 9fa8f7fc0a24371f7956450d840bdae8a2fc9a51 Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 28 Sep 2017 00:40:25 +0300 Subject: viml/parser/expressions: Add a way to adjust lexer It also adds support for kExprLexOr which for some reason was forgotten. It was only made sure that KLEE test compiles in non-KLEE mode, not that something works or that KLEE is able to run tests. --- src/nvim/viml/parser/expressions.c | 105 +++++++++++++++++++++++++------------ src/nvim/viml/parser/expressions.h | 28 ++++++++++ 2 files changed, 100 insertions(+), 33 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index cabf2dac58..3027c0046b 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -47,10 +47,10 @@ typedef enum { /// Get next token for the VimL expression input /// /// @param pstate Parser state. -/// @param[in] peek If true, do not advance pstate cursor. +/// @param[in] flags Flags, @see LexExprFlags. /// /// @return Next token. -LexExprToken viml_pexpr_next_token(ParserState *const pstate, const bool peek) +LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL { LexExprToken ret = { @@ -153,12 +153,33 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const bool peek) } // Number. - // Note: determining whether dot is (not) a part of a float needs more - // context, so lexer does not do this. - // FIXME: Resolve ambiguity by additional argument. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { + ret.data.num.is_float = false; CHARREG(kExprLexNumber, ascii_isdigit); + if (flags & kELFlagAllowFloat) { + if (pline.size > ret.len + 1 + && pline.data[ret.len] == '.' + && ascii_isdigit(pline.data[ret.len + 1])) { + ret.len++; + ret.data.num.is_float = true; + CHARREG(kExprLexNumber, ascii_isdigit); + if (pline.size > ret.len + 1 + && (pline.data[ret.len] == 'e' + || pline.data[ret.len] == 'E') + && ((pline.size > ret.len + 2 + && (pline.data[ret.len + 1] == '+' + || pline.data[ret.len + 1] == '-') + && ascii_isdigit(pline.data[ret.len + 2])) + || ascii_isdigit(pline.data[ret.len + 1]))) { + ret.len++; + if (pline.data[ret.len] == '+' || pline.data[ret.len] == '-') { + ret.len++; + } + CHARREG(kExprLexNumber, ascii_isdigit); + } + } + } break; } @@ -187,8 +208,9 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const bool peek) ret.data.var.autoload = false; CHARREG(kExprLexPlainIdentifier, ISWORD); // "is" and "isnot" operators. - if ((ret.len == 2 && memcmp(pline.data, "is", 2) == 0) - || (ret.len == 5 && memcmp(pline.data, "isnot", 5) == 0)) { + if (!(flags & kELFlagIsNotCmp) + && ((ret.len == 2 && memcmp(pline.data, "is", 2) == 0) + || (ret.len == 5 && memcmp(pline.data, "isnot", 5) == 0))) { ret.type = kExprLexComparison; ret.data.cmp.type = kExprLexCmpIdentical; ret.data.cmp.inv = (ret.len == 5); @@ -197,14 +219,14 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const bool peek) } else if (ret.len == 1 && pline.size > 1 && strchr("sgvbwtla", schar) != NULL - && pline.data[ret.len] == ':') { + && pline.data[ret.len] == ':' + && !(flags & kELFlagForbidScope)) { ret.len++; ret.data.var.scope = schar; CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD); ret.data.var.autoload = ( memchr(pline.data + 2, AUTOLOAD_CHAR, ret.len - 2) != NULL); - // FIXME: Resolve ambiguity with an argument to the lexer function. // Previous CHARREG stopped at autoload character in order to make it // possible to detect `is#`. Continue now with autoload characters // included. @@ -373,7 +395,30 @@ viml_pexpr_next_token_invalid_comparison: // Expression end because Ex command ended. case NUL: case NL: { - ret.type = kExprLexEOC; + if (flags & kELFlagForbidEOC) { + ret.type = kExprLexInvalid; + ret.data.err.msg = _("E15: Unexpected EOC character: %.*s"); + ret.data.err.type = kExprLexSpacing; + } else { + ret.type = kExprLexEOC; + } + break; + } + + case '|': { + if (pline.size >= 2 && pline.data[ret.len] == '|') { + // "||" is or. + ret.len++; + ret.type = kExprLexOr; + } else if (flags & kELFlagForbidEOC) { + // Note: `=1 | 2` actually yields 1 in Vim without any + // errors. This will be changed here. + ret.type = kExprLexInvalid; + ret.data.err.msg = _("E15: Unexpected EOC character: %.*s"); + ret.data.err.type = kExprLexOr; + } else { + ret.type = kExprLexEOC; + } break; } @@ -389,7 +434,7 @@ viml_pexpr_next_token_invalid_comparison: } #undef GET_CCS viml_pexpr_next_token_adv_return: - if (!peek) { + if (!(flags & kELFlagPeek)) { viml_parser_advance(pstate, ret.len); } return ret; @@ -990,34 +1035,28 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) // Lambda node, valid when parsing lambda arguments only. ExprASTNode *lambda_node = NULL; do { - LexExprToken cur_token = viml_pexpr_next_token(pstate, true); + const int want_node_to_lexer_flags[] = { + [kENodeValue] = kELFlagIsNotCmp, + [kENodeOperator] = kELFlagForbidScope, + [kENodeArgument] = kELFlagIsNotCmp, + [kENodeArgumentSeparator] = kELFlagForbidScope, + }; + // FIXME Determine when (not) to allow floating-point numbers. + const int lexer_additional_flags = ( + kELFlagPeek + | ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0)); + LexExprToken cur_token = viml_pexpr_next_token( + pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags); if (cur_token.type == kExprLexEOC) { - if (flags & kExprFlagsDisallowEOC) { - if (cur_token.len == 0) { - // It is end of string, break. - break; - } else { - // It is NL, NUL or bar. - // - // Note: `=1 | 2` actually yields 1 in Vim without any - // errors. This will be changed here. - cur_token.type = kExprLexInvalid; - cur_token.data.err.msg = _("E15: Unexpected EOC character: %.*s"); - const ParserLine pline = ( - pstate->reader.lines.items[cur_token.start.line]); - const char eoc_char = pline.data[cur_token.start.col]; - cur_token.data.err.type = ((eoc_char == NUL || eoc_char == NL) - ? kExprLexSpacing - : kExprLexOr); - } - } else { - break; - } + break; } LexExprTokenType tok_type = cur_token.type; const bool token_invalid = (tok_type == kExprLexInvalid); bool is_invalid = token_invalid; viml_pexpr_parse_process_token: + // May use different flags this time. + cur_token = viml_pexpr_next_token( + pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags); if (tok_type == kExprLexSpacing) { if (is_invalid) { HL_CUR_TOKEN(Spacing); diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index 13640ec137..64abab9e41 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -109,9 +109,37 @@ typedef struct { LexExprTokenType type; ///< Suggested type for parsing incorrect code. const char *msg; ///< Error message. } err; ///< For kExprLexInvalid + + struct { + bool is_float; ///< True if number is a floating-point. + } num; ///< For kExprLexNumber } data; ///< Additional data, if needed. } LexExprToken; +typedef enum { + /// If set, “pointer” to the current byte in pstate will not be shifted + kELFlagPeek = (1 << 0), + /// Determines whether scope is allowed to come before the identifier + kELFlagForbidScope = (1 << 1), + /// Determines whether floating-point numbers are allowed + /// + /// I.e. whether dot is a decimal point separator or is not a part of + /// a number at all. + kELFlagAllowFloat = (1 << 2), + /// Determines whether `is` and `isnot` are seen as comparison operators + /// + /// If set they are supposed to be just regular identifiers. + kELFlagIsNotCmp = (1 << 3), + /// Determines whether EOC tokens are allowed + /// + /// If set then it will yield Invalid token with E15 in place of EOC one if + /// “EOC” is something like "|". It is fine with emitting EOC at the end of + /// string still, with or without this flag set. + kELFlagForbidEOC = (1 << 4), + // WARNING: whenever you add a new flag, alter klee_assume() statement in + // viml_expressions_lexer.c. +} LexExprFlags; + /// Expression AST node type typedef enum { kExprNodeMissing = 'X', -- cgit From f33543377e39fc62ab063ca57c716984fb07aea1 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sat, 30 Sep 2017 21:34:34 +0300 Subject: viml/parser/expressions: Add a way to represent tokens from C code --- src/nvim/viml/parser/expressions.c | 179 +++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 3027c0046b..fc64fee140 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -440,6 +440,176 @@ viml_pexpr_next_token_adv_return: return ret; } +#ifdef UNIT_TESTING +static const char *const eltkn_type_tab[] = { + [kExprLexInvalid] = "Invalid", + [kExprLexMissing] = "Missing", + [kExprLexSpacing] = "Spacing", + [kExprLexEOC] = "EOC", + + [kExprLexQuestion] = "Question", + [kExprLexColon] = "Colon", + [kExprLexOr] = "Or", + [kExprLexAnd] = "And", + [kExprLexComparison] = "Comparison", + [kExprLexPlus] = "Plus", + [kExprLexMinus] = "Minus", + [kExprLexDot] = "Dot", + [kExprLexMultiplication] = "Multiplication", + + [kExprLexNot] = "Not", + + [kExprLexNumber] = "Number", + [kExprLexSingleQuotedString] = "SingleQuotedString", + [kExprLexDoubleQuotedString] = "DoubleQuotedString", + [kExprLexOption] = "Option", + [kExprLexRegister] = "Register", + [kExprLexEnv] = "Env", + [kExprLexPlainIdentifier] = "PlainIdentifier", + + [kExprLexBracket] = "Bracket", + [kExprLexFigureBrace] = "FigureBrace", + [kExprLexParenthesis] = "Parenthesis", + [kExprLexComma] = "Comma", + [kExprLexArrow] = "Arrow", +}; + +static const char *const eltkn_cmp_type_tab[] = { + [kExprLexCmpEqual] = "Equal", + [kExprLexCmpMatches] = "Matches", + [kExprLexCmpGreater] = "Greater", + [kExprLexCmpGreaterOrEqual] = "GreaterOrEqual", + [kExprLexCmpIdentical] = "Identical", +}; + +static const char *const ccs_tab[] = { + [kCCStrategyUseOption] = "UseOption", + [kCCStrategyMatchCase] = "MatchCase", + [kCCStrategyIgnoreCase] = "IgnoreCase", +}; + +static const char *const eltkn_mul_type_tab[] = { + [kExprLexMulMul] = "Mul", + [kExprLexMulDiv] = "Div", + [kExprLexMulMod] = "Mod", +}; + +static const char *const eltkn_opt_scope_tab[] = { + [kExprLexOptUnspecified] = "Unspecified", + [kExprLexOptGlobal] = "Global", + [kExprLexOptLocal] = "Local", +}; + +/// Represent `int` character as a string +/// +/// Converts +/// - ASCII digits into '{digit}' +/// - ASCII printable characters into a single-character strings +/// - everything else to numbers. +/// +/// @param[in] ch Character to convert. +/// +/// @return Converted string, stored in a static buffer (overriden after each +/// call). +static const char *intchar2str(const int ch) + FUNC_ATTR_WARN_UNUSED_RESULT +{ + static char buf[sizeof(int) * 3 + 1]; + if (' ' <= ch && ch < 0x7f) { + if (ascii_isdigit(ch)) { + buf[0] = '\''; + buf[1] = (char)ch; + buf[2] = '\''; + buf[3] = NUL; + } else { + buf[0] = (char)ch; + buf[1] = NUL; + } + } else { + snprintf(buf, sizeof(buf), "%i", ch); + } + return buf; +} + +/// Represent token as a string +/// +/// Intended for testing and debugging purposes. +/// +/// @param[in] pstate Parser state, needed to get token string from it. May be +/// NULL, in which case in place of obtaining part of the +/// string represented by token only token length is +/// returned. +/// @param[in] token Token to represent. +/// @param[out] ret_size Return string size, for cases like NULs inside +/// a string. May be NULL. +/// +/// @return Token represented in a string form, in a static buffer (overwritten +/// on each call). +const char *viml_pexpr_repr_token(const ParserState *const pstate, + const LexExprToken token, + size_t *const ret_size) + FUNC_ATTR_WARN_UNUSED_RESULT +{ + static char ret[1024]; + char *p = ret; + const char *const e = &ret[1024] - 1; +#define ADDSTR(...) \ + do { \ + p += snprintf(p, (size_t)(sizeof(ret) - (size_t)(p - ret)), __VA_ARGS__); \ + if (p >= e) { \ + goto viml_pexpr_repr_token_end; \ + } \ + } while (0) + ADDSTR("%zu:%zu:%s", token.start.line, token.start.col, + eltkn_type_tab[token.type]); + switch (token.type) { +#define TKNARGS(tkn_type, ...) \ + case tkn_type: { \ + ADDSTR(__VA_ARGS__); \ + break; \ + } + TKNARGS(kExprLexComparison, "(type=%s,ccs=%s,inv=%i)", + eltkn_cmp_type_tab[token.data.cmp.type], + ccs_tab[token.data.cmp.ccs], + (int)token.data.cmp.inv) + TKNARGS(kExprLexMultiplication, "(type=%s)", + eltkn_mul_type_tab[token.data.mul.type]) + TKNARGS(kExprLexRegister, "(name=%s)", intchar2str(token.data.reg.name)) + case kExprLexDoubleQuotedString: + TKNARGS(kExprLexSingleQuotedString, "(closed=%i)", + (int)token.data.str.closed) + TKNARGS(kExprLexOption, "(scope=%s,name=%.*s)", + eltkn_opt_scope_tab[token.data.opt.scope], + (int)token.data.opt.len, token.data.opt.name) + TKNARGS(kExprLexPlainIdentifier, "(scope=%s,autoload=%i)", + intchar2str(token.data.var.scope), (int)token.data.var.autoload) + TKNARGS(kExprLexNumber, "(is_float=%i)", (int)token.data.num.is_float) + TKNARGS(kExprLexInvalid, "(msg=%s)", token.data.err.msg) + default: { + // No additional arguments. + break; + } +#undef TKNARGS + } + if (pstate == NULL) { + ADDSTR("::%zu", token.len); + } else { + *p++ = ':'; + memmove( + p, &pstate->reader.lines.items[token.start.line].data[token.start.col], + token.len); + p += token.len; + *p = NUL; + } +#undef ADDSTR +viml_pexpr_repr_token_end: + if (ret_size != NULL) { + *ret_size = (size_t)(p - ret); + } + return ret; +} +#endif + #ifdef UNIT_TESTING #include REAL_FATTR_UNUSED @@ -469,12 +639,21 @@ static inline void viml_pexpr_debug_print_ast_stack( "-"); } } + +static inline void viml_pexpr_debug_print_token( + const ParserState *const pstate, const LexExprToken token) + FUNC_ATTR_ALWAYS_INLINE +{ + fprintf(stderr, "\ntkn: %s\n", viml_pexpr_repr_token(pstate, token, NULL)); +} #define PSTACK(msg) \ viml_pexpr_debug_print_ast_stack(&ast_stack, #msg) #define PSTACK_P(msg) \ viml_pexpr_debug_print_ast_stack(ast_stack, #msg) #define PNODE_P(eastnode_p, msg) \ viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)ast_stack, #msg) +#define PTOKEN(tkn) \ + viml_pexpr_debug_print_token(pstate, tkn) #endif // start = s ternary_expr s EOC -- cgit From 3735537a508c5690c4622ebe450e6f3f15706670 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 1 Oct 2017 15:54:46 +0300 Subject: viml/parser/expressions: Fix call inside nested parenthesis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It may have incorrectly tried to call everything because of essentially “value” nodes being treated as not such. --- src/nvim/viml/parser/expressions.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index fc64fee140..1713d0c89f 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -612,6 +612,7 @@ viml_pexpr_repr_token_end: #ifdef UNIT_TESTING #include + REAL_FATTR_UNUSED static inline void viml_pexpr_debug_print_ast_node( const ExprASTNode *const *const eastnode_p, @@ -626,6 +627,7 @@ static inline void viml_pexpr_debug_print_ast_node( (*eastnode_p)->start.col, (*eastnode_p)->len); } } + REAL_FATTR_UNUSED static inline void viml_pexpr_debug_print_ast_stack( const ExprASTStack *const ast_stack, @@ -640,6 +642,7 @@ static inline void viml_pexpr_debug_print_ast_stack( } } +REAL_FATTR_UNUSED static inline void viml_pexpr_debug_print_token( const ParserState *const pstate, const LexExprToken token) FUNC_ATTR_ALWAYS_INLINE @@ -794,6 +797,7 @@ static inline ExprASTNode *viml_pexpr_new_node(const ExprASTNodeType type) typedef enum { kEOpLvlInvalid = 0, + kEOpLvlComplexIdentifier, kEOpLvlParens, kEOpLvlArrow, kEOpLvlComma, @@ -806,7 +810,6 @@ typedef enum { kEOpLvlMultiplication, ///< Multiplication, division and modulo. kEOpLvlUnary, ///< Unary operations: not, minus, plus. kEOpLvlSubscript, ///< Subscripts. - kEOpLvlComplexIdentifier, ///< Plain identifier, curly braces name. kEOpLvlValue, ///< Values: literals, variables, nested expressions, … } ExprOpLvl; @@ -843,10 +846,10 @@ static const ExprOpLvl node_type_to_op_lvl[] = { [kExprNodeSubscript] = kEOpLvlSubscript, - [kExprNodeComplexIdentifier] = kEOpLvlComplexIdentifier, - [kExprNodePlainIdentifier] = kEOpLvlComplexIdentifier, [kExprNodeCurlyBracesIdentifier] = kEOpLvlComplexIdentifier, + [kExprNodeComplexIdentifier] = kEOpLvlValue, + [kExprNodePlainIdentifier] = kEOpLvlValue, [kExprNodeRegister] = kEOpLvlValue, [kExprNodeListLiteral] = kEOpLvlValue, }; @@ -884,10 +887,10 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { [kExprNodeSubscript] = kEOpAssLeft, - [kExprNodePlainIdentifier] = kEOpAssLeft, - [kExprNodeComplexIdentifier] = kEOpAssLeft, [kExprNodeCurlyBracesIdentifier] = kEOpAssLeft, + [kExprNodeComplexIdentifier] = kEOpAssLeft, + [kExprNodePlainIdentifier] = kEOpAssNo, [kExprNodeRegister] = kEOpAssNo, [kExprNodeListLiteral] = kEOpAssNo, }; -- cgit From 9e721031d597bfa435da03597939191970f7a918 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 1 Oct 2017 16:50:46 +0300 Subject: viml/parser/expressions: Fix determining invalid commas/colons --- src/nvim/viml/parser/expressions.c | 163 +++++++++++++++++++++++++------------ src/nvim/viml/parser/expressions.h | 8 +- 2 files changed, 117 insertions(+), 54 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 1713d0c89f..c283241cb4 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -13,6 +13,7 @@ #include "nvim/types.h" #include "nvim/charset.h" #include "nvim/ascii.h" +#include "nvim/assert.h" #include "nvim/lib/kvec.h" #include "nvim/viml/parser/expressions.h" @@ -37,6 +38,32 @@ typedef enum { kENodeArgumentSeparator, } ExprASTWantedNode; +/// Operator priority level +typedef enum { + kEOpLvlInvalid = 0, + kEOpLvlComplexIdentifier, + kEOpLvlParens, + kEOpLvlArrow, + kEOpLvlComma, + kEOpLvlColon, + kEOpLvlTernary, + kEOpLvlOr, + kEOpLvlAnd, + kEOpLvlComparison, + kEOpLvlAddition, ///< Addition, subtraction and concatenation. + kEOpLvlMultiplication, ///< Multiplication, division and modulo. + kEOpLvlUnary, ///< Unary operations: not, minus, plus. + kEOpLvlSubscript, ///< Subscripts. + kEOpLvlValue, ///< Values: literals, variables, nested expressions, … +} ExprOpLvl; + +/// Operator associativity +typedef enum { + kEOpAssNo= 'n', ///< Not associative / not applicable. + kEOpAssLeft = 'l', ///< Left associativity. + kEOpAssRight = 'r', ///< Right associativity. +} ExprOpAssociativity; + #ifdef INCLUDE_GENERATED_DECLARATIONS # include "viml/parser/expressions.c.generated.h" #endif @@ -747,6 +774,7 @@ static inline void viml_pexpr_debug_print_token( // // NVimParenthesis -> Delimiter // +// NVimColon -> Delimiter // NVimComma -> Delimiter // NVimArrow -> Delimiter // @@ -895,6 +923,32 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { [kExprNodeListLiteral] = kEOpAssNo, }; +/// Get AST node priority level +/// +/// Used primary to reduce line length, so keep the name short. +/// +/// @param[in] node Node to get priority for. +/// +/// @return Node priority level. +static inline ExprOpLvl node_lvl(const ExprASTNode node) + FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT +{ + return node_type_to_op_lvl[node.type]; +} + +/// Get AST node associativity, to be used for operator nodes primary +/// +/// Used primary to reduce line length, so keep the name short. +/// +/// @param[in] node Node to get priority for. +/// +/// @return Node associativity. +static inline ExprOpAssociativity node_ass(const ExprASTNode node) + FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT +{ + return node_type_to_op_ass[node.type]; +} + /// Handle binary operator /// /// This function is responsible for handling priority levels as well. @@ -910,20 +964,19 @@ static void viml_pexpr_handle_bop(ExprASTStack *const ast_stack, assert(kv_size(*ast_stack)); const ExprOpLvl bop_node_lvl = (bop_node->type == kExprNodeCall ? kEOpLvlSubscript - : node_type_to_op_lvl[bop_node->type]); + : node_lvl(*bop_node)); #ifndef NDEBUG const ExprOpAssociativity bop_node_ass = ( bop_node->type == kExprNodeCall ? kEOpAssLeft - : node_type_to_op_ass[bop_node->type]); + : node_ass(*bop_node)); #endif do { ExprASTNode **new_top_node_p = kv_last(*ast_stack); ExprASTNode *new_top_node = *new_top_node_p; assert(new_top_node != NULL); - const ExprOpLvl new_top_node_lvl = node_type_to_op_lvl[new_top_node->type]; - const ExprOpAssociativity new_top_node_ass = ( - node_type_to_op_ass[new_top_node->type]); + const ExprOpLvl new_top_node_lvl = node_lvl(*new_top_node); + const ExprOpAssociativity new_top_node_ass = node_ass(*new_top_node); assert(bop_node_lvl != new_top_node_lvl || bop_node_ass == new_top_node_ass); if (top_node_p != NULL @@ -1352,32 +1405,31 @@ viml_pexpr_parse_process_token: goto viml_pexpr_parse_invalid_comma; } for (size_t i = 1; i < kv_size(ast_stack); i++) { - const ExprASTNode *const *const eastnode_p = - (const ExprASTNode *const *)kv_Z(ast_stack, i); - if (!((*eastnode_p)->type == kExprNodeComma - || ((*eastnode_p)->type == kExprNodeColon - && i == 1)) - || i == kv_size(ast_stack) - 1) { - switch ((*eastnode_p)->type) { - case kExprNodeLambda: { - assert(want_node == kENodeArgumentSeparator); - break; - } - case kExprNodeDictLiteral: - case kExprNodeListLiteral: - case kExprNodeCall: { - break; - } - default: { + ExprASTNode *const *const eastnode_p = + (ExprASTNode *const *)kv_Z(ast_stack, i); + const ExprASTNodeType eastnode_type = (*eastnode_p)->type; + const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p); + if (eastnode_type == kExprNodeLambda) { + assert(want_node == kENodeArgumentSeparator); + break; + } else if (eastnode_type == kExprNodeDictLiteral + || eastnode_type == kExprNodeListLiteral + || eastnode_type == kExprNodeCall) { + break; + } else if (eastnode_type == kExprNodeComma + || eastnode_type == kExprNodeColon + || eastnode_lvl > kEOpLvlComma) { + // Do nothing + } else { viml_pexpr_parse_invalid_comma: - ERROR_FROM_TOKEN_AND_MSG( - cur_token, - _("E15: Comma outside of call, lambda or literal: %.*s")); - break; - } - } + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Comma outside of call, lambda or literal: %.*s")); break; } + if (i == kv_size(ast_stack) - 1) { + goto viml_pexpr_parse_invalid_comma; + } } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComma); viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); @@ -1389,37 +1441,48 @@ viml_pexpr_parse_invalid_comma: if (kv_size(ast_stack) < 2) { goto viml_pexpr_parse_invalid_colon; } + bool is_ternary = false; + bool can_be_ternary = true; for (size_t i = 1; i < kv_size(ast_stack); i++) { ExprASTNode *const *const eastnode_p = (ExprASTNode *const *)kv_Z(ast_stack, i); - if ((*eastnode_p)->type != kExprNodeColon - || i == kv_size(ast_stack) - 1) { - switch ((*eastnode_p)->type) { - case kExprNodeUnknownFigure: { - SELECT_FIGURE_BRACE_TYPE((*eastnode_p), DictLiteral, Dict); - break; - } - case kExprNodeComma: - case kExprNodeDictLiteral: - case kExprNodeTernary: { - break; - } - default: { + const ExprASTNodeType eastnode_type = (*eastnode_p)->type; + const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p); + STATIC_ASSERT(kEOpLvlTernary > kEOpLvlComma, + "Unexpected operator priorities"); + if (can_be_ternary && eastnode_lvl == kEOpLvlTernary) { + assert(eastnode_type == kExprNodeTernary); + is_ternary = true; + break; + } else if (eastnode_type == kExprNodeUnknownFigure) { + SELECT_FIGURE_BRACE_TYPE(*eastnode_p, DictLiteral, Dict); + break; + } else if (eastnode_type == kExprNodeDictLiteral + || eastnode_type == kExprNodeComma) { + break; + } else if (eastnode_lvl > kEOpLvlTernary) { + // Do nothing + } else if (eastnode_lvl > kEOpLvlComma) { + can_be_ternary = false; + } else { viml_pexpr_parse_invalid_colon: - ERROR_FROM_TOKEN_AND_MSG( - cur_token, - _("E15: Colon outside of dictionary or ternary operator: " - "%.*s")); - break; - } - } + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Colon outside of dictionary or ternary operator: " + "%.*s")); break; } + if (i == kv_size(ast_stack) - 1) { + goto viml_pexpr_parse_invalid_colon; + } } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); - // FIXME: Handle ternary operator. - HL_CUR_TOKEN(Colon); + if (is_ternary) { + HL_CUR_TOKEN(TernaryColon); + } else { + HL_CUR_TOKEN(Colon); + } want_node = kENodeValue; break; } diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index 64abab9e41..01a51e4eda 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -144,10 +144,10 @@ typedef enum { typedef enum { kExprNodeMissing = 'X', kExprNodeOpMissing = '_', - kExprNodeTernary = '?', ///< Ternary operator, valid one has three children. - kExprNodeRegister = '@', ///< Register, no children. - kExprNodeSubscript = 's', ///< Subscript, should have two or three children. - kExprNodeListLiteral = 'l', ///< List literal, any number of children. + kExprNodeTernary = '?', ///< Ternary operator. + kExprNodeRegister = '@', ///< Register. + kExprNodeSubscript = 's', ///< Subscript. + kExprNodeListLiteral = 'l', ///< List literal. kExprNodeUnaryPlus = 'p', kExprNodeBinaryPlus = '+', kExprNodeNested = 'e', ///< Nested parenthesised expression. -- cgit From 6144e26eb920a90b0db22bd7afcac0b9e0734ed6 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 1 Oct 2017 22:35:41 +0300 Subject: viml/parser/expressions: Add support for ternary operator --- src/nvim/viml/parser/expressions.c | 87 ++++++++++++++++++++++---------------- src/nvim/viml/parser/expressions.h | 6 ++- 2 files changed, 56 insertions(+), 37 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index c283241cb4..41c77c5c88 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -46,6 +46,7 @@ typedef enum { kEOpLvlArrow, kEOpLvlComma, kEOpLvlColon, + kEOpLvlTernaryValue, kEOpLvlTernary, kEOpLvlOr, kEOpLvlAnd, @@ -770,7 +771,8 @@ static inline void viml_pexpr_debug_print_token( // NVimUnaryOperator -> NVimOperator // NVimBinaryOperator -> NVimOperator // NVimComparisonOperator -> NVimOperator -// NVimTernaryOperator -> NVimOperator +// NVimTernary -> NVimOperator +// NVimTernaryColon -> NVimTernary // // NVimParenthesis -> Delimiter // @@ -790,7 +792,8 @@ static inline void viml_pexpr_debug_print_token( // // NVimInvalidComma -> NVimInvalidDelimiter // NVimInvalidSpacing -> NVimInvalid -// NVimInvalidTernaryOperator -> NVimInvalidOperator +// NVimInvalidTernary -> NVimInvalidOperator +// NVimInvalidTernaryColon -> NVimInvalidTernary // NVimInvalidRegister -> NVimInvalidValue // NVimInvalidClosingBracket -> NVimInvalidDelimiter // NVimInvalidSpacing -> NVimInvalid @@ -823,30 +826,6 @@ static inline ExprASTNode *viml_pexpr_new_node(const ExprASTNodeType type) return ret; } -typedef enum { - kEOpLvlInvalid = 0, - kEOpLvlComplexIdentifier, - kEOpLvlParens, - kEOpLvlArrow, - kEOpLvlComma, - kEOpLvlColon, - kEOpLvlTernary, - kEOpLvlOr, - kEOpLvlAnd, - kEOpLvlComparison, - kEOpLvlAddition, ///< Addition, subtraction and concatenation. - kEOpLvlMultiplication, ///< Multiplication, division and modulo. - kEOpLvlUnary, ///< Unary operations: not, minus, plus. - kEOpLvlSubscript, ///< Subscripts. - kEOpLvlValue, ///< Values: literals, variables, nested expressions, … -} ExprOpLvl; - -typedef enum { - kEOpAssNo= 'n', ///< Not associative / not applicable. - kEOpAssLeft = 'l', ///< Left associativity. - kEOpAssRight = 'r', ///< Right associativity. -} ExprOpAssociativity; - static const ExprOpLvl node_type_to_op_lvl[] = { [kExprNodeMissing] = kEOpLvlInvalid, [kExprNodeOpMissing] = kEOpLvlMultiplication, @@ -868,6 +847,8 @@ static const ExprOpLvl node_type_to_op_lvl[] = { [kExprNodeTernary] = kEOpLvlTernary, + [kExprNodeTernaryValue] = kEOpLvlTernaryValue, + [kExprNodeBinaryPlus] = kEOpLvlAddition, [kExprNodeUnaryPlus] = kEOpLvlUnary, @@ -907,7 +888,9 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { // about associativity, only about order of execution. [kExprNodeComma] = kEOpAssRight, - [kExprNodeTernary] = kEOpAssNo, + [kExprNodeTernary] = kEOpAssRight, + + [kExprNodeTernaryValue] = kEOpAssRight, [kExprNodeBinaryPlus] = kEOpAssLeft, @@ -1450,9 +1433,20 @@ viml_pexpr_parse_invalid_comma: const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p); STATIC_ASSERT(kEOpLvlTernary > kEOpLvlComma, "Unexpected operator priorities"); - if (can_be_ternary && eastnode_lvl == kEOpLvlTernary) { - assert(eastnode_type == kExprNodeTernary); + if (can_be_ternary && eastnode_type == kExprNodeTernaryValue + && !(*eastnode_p)->data.ter.got_colon) { + kv_drop(ast_stack, i); + (*eastnode_p)->start = cur_token.start; + (*eastnode_p)->len = cur_token.len; + if (prev_token.type == kExprLexSpacing) { + (*eastnode_p)->start = prev_token.start; + (*eastnode_p)->len += prev_token.len; + } is_ternary = true; + (*eastnode_p)->data.ter.got_colon = true; + assert((*eastnode_p)->children != NULL); + assert((*eastnode_p)->children->next == NULL); + kvi_push(ast_stack, &(*eastnode_p)->children->next); break; } else if (eastnode_type == kExprNodeUnknownFigure) { SELECT_FIGURE_BRACE_TYPE(*eastnode_p, DictLiteral, Dict); @@ -1460,7 +1454,7 @@ viml_pexpr_parse_invalid_comma: } else if (eastnode_type == kExprNodeDictLiteral || eastnode_type == kExprNodeComma) { break; - } else if (eastnode_lvl > kEOpLvlTernary) { + } else if (eastnode_lvl >= kEOpLvlTernaryValue) { // Do nothing } else if (eastnode_lvl > kEOpLvlComma) { can_be_ternary = false; @@ -1476,11 +1470,11 @@ viml_pexpr_parse_invalid_colon: goto viml_pexpr_parse_invalid_colon; } } - NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); - viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); if (is_ternary) { HL_CUR_TOKEN(TernaryColon); } else { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); + viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); HL_CUR_TOKEN(Colon); } want_node = kENodeValue; @@ -1683,8 +1677,6 @@ viml_pexpr_parse_figure_brace_closing_error: cur_token.len - scope_shift, HL(Identifier)); } - // FIXME: Actually, g{foo}g:foo is valid: "1?g{foo}g:foo" is like - // "g{foo}g" and not an error. } else { if (cur_token.data.var.scope == 0) { ADD_IDENT( @@ -1792,6 +1784,21 @@ viml_pexpr_parse_no_paren_closing_error: {} } break; } + case kExprLexQuestion: { + ADD_VALUE_IF_MISSING(_("E15: Expected value, got question mark: %.*s")); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeTernary); + viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); + HL_CUR_TOKEN(Ternary); + ExprASTNode *ter_val_node; + NEW_NODE_WITH_CUR_POS(ter_val_node, kExprNodeTernaryValue); + ter_val_node->data.ter.got_colon = false; + assert(cur_node->children != NULL); + assert(cur_node->children->next == NULL); + assert(kv_last(ast_stack) == &cur_node->children->next); + *kv_last(ast_stack) = ter_val_node; + kvi_push(ast_stack, &ter_val_node->children); + break; + } } viml_pexpr_parse_cycle_end: prev_token = cur_token; @@ -1815,6 +1822,7 @@ viml_pexpr_parse_end: const ExprASTNode *const cur_node = (*kv_pop(ast_stack)); // This should only happen when want_node == kENodeValue. assert(cur_node != NULL); + // TODO(ZyX-I): Rehighlight as invalid? switch (cur_node->type) { case kExprNodeOpMissing: case kExprNodeMissing: { @@ -1822,7 +1830,6 @@ viml_pexpr_parse_end: break; } case kExprNodeCall: { - // TODO(ZyX-I): Rehighlight as invalid? east_set_error( &ast, pstate, _("E116: Missing closing parenthesis for function call: %.*s"), @@ -1830,7 +1837,6 @@ viml_pexpr_parse_end: break; } case kExprNodeNested: { - // TODO(ZyX-I): Rehighlight as invalid? east_set_error( &ast, pstate, _("E110: Missing closing parenthesis for nested expression" @@ -1844,6 +1850,15 @@ viml_pexpr_parse_end: // It is OK to see these in the stack. break; } + case kExprNodeTernaryValue: { + if (!cur_node->data.ter.got_colon) { + // Actually Vim throws E109 in more cases. + east_set_error( + &ast, pstate, _("E109: Missing ':' after '?': %.*s"), + cur_node->start); + } + break; + } // TODO(ZyX-I): handle other values } } diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index 01a51e4eda..cf0907850a 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -145,6 +145,7 @@ typedef enum { kExprNodeMissing = 'X', kExprNodeOpMissing = '_', kExprNodeTernary = '?', ///< Ternary operator. + kExprNodeTernaryValue = 'C', ///< Ternary operator, colon. kExprNodeRegister = '@', ///< Register. kExprNodeSubscript = 's', ///< Subscript. kExprNodeListLiteral = 'l', ///< List literal. @@ -209,7 +210,10 @@ struct expr_ast_node { /// Points to inside parser reader state. const char *ident; size_t ident_len; ///< Actual identifier length. - } var; + } var; ///< For kExprNodePlainIdentifier. + struct { + bool got_colon; ///< True if colon was seen. + } ter; ///< For kExprNodeTernaryValue. } data; }; -- cgit From 6791c574209c83570746c139d93f8e6a6b9cd135 Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 2 Oct 2017 01:22:35 +0300 Subject: viml/parser/expressions: Make sure that arrows outside lambda throw --- src/nvim/viml/parser/expressions.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 41c77c5c88..982465055e 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -1642,8 +1642,9 @@ viml_pexpr_parse_figure_brace_closing_error: lambda_node = NULL; } else { // Only first branch is valid. - is_invalid = true; ADD_VALUE_IF_MISSING(_("E15: Unexpected arrow: %.*s")); + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Arrow outside of lambda: %.*s")); NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow); viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); } -- cgit From 6168e1127c1c80a3810854649b0776146545043b Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 2 Oct 2017 02:41:55 +0300 Subject: viml/parser/expressions: Add support for comparison operators --- src/nvim/viml/parser/expressions.c | 131 ++++++++++++++++++++++++++----------- src/nvim/viml/parser/expressions.h | 45 ++++++++----- 2 files changed, 122 insertions(+), 54 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 982465055e..8e6f991e03 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -102,7 +102,7 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) if (ret.len < pline.size \ && strchr("?#", pline.data[ret.len]) != NULL) { \ ret.data.cmp.ccs = \ - (CaseCompareStrategy)pline.data[ret.len]; \ + (ExprCaseCompareStrategy)pline.data[ret.len]; \ ret.len++; \ } else { \ ret.data.cmp.ccs = kCCStrategyUseOption; \ @@ -240,7 +240,7 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) && ((ret.len == 2 && memcmp(pline.data, "is", 2) == 0) || (ret.len == 5 && memcmp(pline.data, "isnot", 5) == 0))) { ret.type = kExprLexComparison; - ret.data.cmp.type = kExprLexCmpIdentical; + ret.data.cmp.type = kExprCmpIdentical; ret.data.cmp.inv = (ret.len == 5); GET_CCS(ret, pline); // Scope: `s:`, etc. @@ -381,10 +381,10 @@ viml_pexpr_next_token_invalid_comparison: ret.type = kExprLexComparison; ret.data.cmp.inv = (schar == '!'); if (pline.data[1] == '=') { - ret.data.cmp.type = kExprLexCmpEqual; + ret.data.cmp.type = kExprCmpEqual; ret.len++; } else if (pline.data[1] == '~') { - ret.data.cmp.type = kExprLexCmpMatches; + ret.data.cmp.type = kExprCmpMatches; ret.len++; } else { goto viml_pexpr_next_token_invalid_comparison; @@ -404,8 +404,8 @@ viml_pexpr_next_token_invalid_comparison: GET_CCS(ret, pline); ret.data.cmp.inv = (schar == '<'); ret.data.cmp.type = ((ret.data.cmp.inv ^ haseqsign) - ? kExprLexCmpGreaterOrEqual - : kExprLexCmpGreater); + ? kExprCmpGreaterOrEqual + : kExprCmpGreater); break; } @@ -503,11 +503,11 @@ static const char *const eltkn_type_tab[] = { }; static const char *const eltkn_cmp_type_tab[] = { - [kExprLexCmpEqual] = "Equal", - [kExprLexCmpMatches] = "Matches", - [kExprLexCmpGreater] = "Greater", - [kExprLexCmpGreaterOrEqual] = "GreaterOrEqual", - [kExprLexCmpIdentical] = "Identical", + [kExprCmpEqual] = "Equal", + [kExprCmpMatches] = "Matches", + [kExprCmpGreater] = "Greater", + [kExprCmpGreaterOrEqual] = "GreaterOrEqual", + [kExprCmpIdentical] = "Identical", }; static const char *const ccs_tab[] = { @@ -770,7 +770,8 @@ static inline void viml_pexpr_debug_print_token( // NVimOperator -> Operator // NVimUnaryOperator -> NVimOperator // NVimBinaryOperator -> NVimOperator -// NVimComparisonOperator -> NVimOperator +// NVimComparisonOperator -> NVimBinaryOperator +// NVimComparisonOperatorModifier -> NVimComparisonOperator // NVimTernary -> NVimOperator // NVimTernaryColon -> NVimTernary // @@ -805,6 +806,8 @@ static inline void viml_pexpr_debug_print_token( // NVimInvalidIdentifier -> NVimInvalidValue // NVimInvalidIdentifierScope -> NVimInvalidValue // NVimInvalidIdentifierScopeDelimiter -> NVimInvalidValue +// NVimInvalidComparisonOperator -> NVimInvalidOperator +// NVimInvalidComparisonOperatorModifier -> NVimInvalidComparisonOperator // // NVimUnaryPlus -> NVimUnaryOperator // NVimBinaryPlus -> NVimBinaryOperator @@ -849,6 +852,8 @@ static const ExprOpLvl node_type_to_op_lvl[] = { [kExprNodeTernaryValue] = kEOpLvlTernaryValue, + [kExprNodeComparison] = kEOpLvlComparison, + [kExprNodeBinaryPlus] = kEOpLvlAddition, [kExprNodeUnaryPlus] = kEOpLvlUnary, @@ -892,6 +897,8 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { [kExprNodeTernaryValue] = kEOpAssRight, + [kExprNodeComparison] = kEOpAssRight, + [kExprNodeBinaryPlus] = kEOpAssLeft, [kExprNodeUnaryPlus] = kEOpAssNo, @@ -935,11 +942,23 @@ static inline ExprOpAssociativity node_ass(const ExprASTNode node) /// Handle binary operator /// /// This function is responsible for handling priority levels as well. -static void viml_pexpr_handle_bop(ExprASTStack *const ast_stack, +/// +/// @param[in] pstate Parser state, used for error reporting. +/// @param ast_stack AST stack. May be popped of some values and will +/// definitely receive new ones. +/// @param bop_node New node to handle. +/// @param[out] want_node_p New value of want_node. +/// @param[out] ast_err Location where error is saved, if any. +/// +/// @return True if no errors occurred, false otherwise. +static bool viml_pexpr_handle_bop(const ParserState *const pstate, + ExprASTStack *const ast_stack, ExprASTNode *const bop_node, - ExprASTWantedNode *const want_node_p) + ExprASTWantedNode *const want_node_p, + ExprASTError *const ast_err) FUNC_ATTR_NONNULL_ALL { + bool ret = true; ExprASTNode **top_node_p = NULL; ExprASTNode *top_node; ExprOpLvl top_node_lvl; @@ -977,7 +996,6 @@ static void viml_pexpr_handle_bop(ExprASTStack *const ast_stack, break; } } while (kv_size(*ast_stack)); - // FIXME: Handle no associativity if (top_node_ass == kEOpAssLeft || top_node_lvl != bop_node_lvl) { // outer(op(x,y)) -> outer(new_op(op(x,y),*)) // @@ -1008,10 +1026,18 @@ static void viml_pexpr_handle_bop(ExprASTStack *const ast_stack, kvi_push(*ast_stack, top_node_p); kvi_push(*ast_stack, &top_node->children->next); kvi_push(*ast_stack, &bop_node->children->next); + // TODO(ZyX-I): Make this not error, but treat like Python does + if (bop_node->type == kExprNodeComparison) { + east_set_error(pstate, ast_err, + _("E15: Operator is not associative: %.*s"), + bop_node->start); + ret = false; + } } *want_node_p = (*want_node_p == kENodeArgumentSeparator ? kENodeArgument : kENodeValue); + return ret; } /// ParserPosition literal based on ParserPosition pos with columns shifted @@ -1074,6 +1100,13 @@ static inline ParserPosition shifted_pos(const ParserPosition pos, #define MAY_HAVE_NEXT_EXPR \ (kv_size(ast_stack) == 1) +/// Add operator node +/// +/// @param[in] cur_node Node to add. +#define ADD_OP_NODE(cur_node) \ + is_invalid |= !viml_pexpr_handle_bop(pstate, &ast_stack, cur_node, \ + &want_node, &ast.err) + /// Record missing operator: for things like /// /// :echo @a @a @@ -1094,7 +1127,7 @@ static inline ParserPosition shifted_pos(const ParserPosition pos, ERROR_FROM_TOKEN_AND_MSG(cur_token, _("E15: Missing operator: %.*s")); \ NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOpMissing); \ cur_node->len = 0; \ - viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); \ + ADD_OP_NODE(cur_node); \ goto viml_pexpr_parse_process_token; \ } \ } while (0) @@ -1120,34 +1153,33 @@ static inline ParserPosition shifted_pos(const ParserPosition pos, /// @param[in] msg Error message, assumed to be already translated and /// containing a single %token "%.*s". /// @param[in] start Position at which error occurred. -static inline void east_set_error(ExprAST *const ret_ast, - const ParserState *const pstate, +static inline void east_set_error(const ParserState *const pstate, + ExprASTError *const ret_ast_err, const char *const msg, const ParserPosition start) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE { - if (!ret_ast->correct) { + if (ret_ast_err->msg != NULL) { return; } const ParserLine pline = pstate->reader.lines.items[start.line]; - ret_ast->correct = false; - ret_ast->err.msg = msg; - ret_ast->err.arg_len = (int)(pline.size - start.col); - ret_ast->err.arg = pline.data + start.col; + ret_ast_err->msg = msg; + ret_ast_err->arg_len = (int)(pline.size - start.col); + ret_ast_err->arg = pline.data + start.col; } /// Set error from the given token and given message #define ERROR_FROM_TOKEN_AND_MSG(cur_token, msg) \ do { \ is_invalid = true; \ - east_set_error(&ast, pstate, msg, cur_token.start); \ + east_set_error(pstate, &ast.err, msg, cur_token.start); \ } while (0) /// Like #ERROR_FROM_TOKEN_AND_MSG, but gets position from a node #define ERROR_FROM_NODE_AND_MSG(node, msg) \ do { \ is_invalid = true; \ - east_set_error(&ast, pstate, msg, node->start); \ + east_set_error(pstate, &ast.err, msg, node->start); \ } while (0) /// Set error from the given kExprLexInvalid token @@ -1231,7 +1263,6 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL { ExprAST ast = { - .correct = true, .err = { .msg = NULL, .arg_len = 0, @@ -1359,12 +1390,38 @@ viml_pexpr_parse_process_token: HL_CUR_TOKEN(UnaryPlus); } else { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeBinaryPlus); - viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); + ADD_OP_NODE(cur_node); HL_CUR_TOKEN(BinaryPlus); } want_node = kENodeValue; break; } + case kExprLexComparison: { + ADD_VALUE_IF_MISSING( + _("E15: Expected value, got comparison operator: %.*s")); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComparison); + if (cur_token.type == kExprLexInvalid) { + cur_node->data.cmp.ccs = kCCStrategyUseOption; + cur_node->data.cmp.type = kExprCmpEqual; + cur_node->data.cmp.inv = false; + } else { + cur_node->data.cmp.ccs = cur_token.data.cmp.ccs; + cur_node->data.cmp.type = cur_token.data.cmp.type; + cur_node->data.cmp.inv = cur_token.data.cmp.inv; + } + ADD_OP_NODE(cur_node); + if (cur_token.data.cmp.ccs != kCCStrategyUseOption) { + viml_parser_highlight(pstate, cur_token.start, cur_token.len - 1, + HL(ComparisonOperator)); + viml_parser_highlight( + pstate, shifted_pos(cur_token.start, cur_token.len - 1), 1, + HL(ComparisonOperatorModifier)); + } else { + HL_CUR_TOKEN(ComparisonOperator); + } + want_node = kENodeValue; + break; + } case kExprLexComma: { assert(want_node != kENodeArgument); if (want_node == kENodeValue) { @@ -1415,7 +1472,7 @@ viml_pexpr_parse_invalid_comma: } } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeComma); - viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); + ADD_OP_NODE(cur_node); HL_CUR_TOKEN(Comma); break; } @@ -1474,7 +1531,7 @@ viml_pexpr_parse_invalid_colon: HL_CUR_TOKEN(TernaryColon); } else { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); - viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); + ADD_OP_NODE(cur_node); HL_CUR_TOKEN(Colon); } want_node = kENodeValue; @@ -1646,7 +1703,7 @@ viml_pexpr_parse_figure_brace_closing_error: ERROR_FROM_TOKEN_AND_MSG( cur_token, _("E15: Arrow outside of lambda: %.*s")); NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow); - viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); + ADD_OP_NODE(cur_node); } want_node = kENodeValue; HL_CUR_TOKEN(Arrow); @@ -1775,7 +1832,7 @@ viml_pexpr_parse_no_paren_closing_error: {} } } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCall); - viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); + ADD_OP_NODE(cur_node); HL_CUR_TOKEN(CallingParenthesis); } else { // Currently it is impossible to reach this. @@ -1788,7 +1845,7 @@ viml_pexpr_parse_no_paren_closing_error: {} case kExprLexQuestion: { ADD_VALUE_IF_MISSING(_("E15: Expected value, got question mark: %.*s")); NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeTernary); - viml_pexpr_handle_bop(&ast_stack, cur_node, &want_node); + ADD_OP_NODE(cur_node); HL_CUR_TOKEN(Ternary); ExprASTNode *ter_val_node; NEW_NODE_WITH_CUR_POS(ter_val_node, kExprNodeTernaryValue); @@ -1808,7 +1865,7 @@ viml_pexpr_parse_cycle_end: } while (true); viml_pexpr_parse_end: if (want_node == kENodeValue) { - east_set_error(&ast, pstate, _("E15: Expected value, got EOC: %.*s"), + east_set_error(pstate, &ast.err, _("E15: Expected value, got EOC: %.*s"), pstate->pos); } else if (kv_size(ast_stack) != 1) { // Something may be wrong, check whether it really is. @@ -1819,7 +1876,7 @@ viml_pexpr_parse_end: // Topmost stack item must be a *finished* value, so it must not be // analyzed. E.g. it may contain an already finished nested expression. kv_drop(ast_stack, 1); - while (ast.correct && kv_size(ast_stack)) { + while (ast.err.msg == NULL && kv_size(ast_stack)) { const ExprASTNode *const cur_node = (*kv_pop(ast_stack)); // This should only happen when want_node == kENodeValue. assert(cur_node != NULL); @@ -1832,14 +1889,14 @@ viml_pexpr_parse_end: } case kExprNodeCall: { east_set_error( - &ast, pstate, + pstate, &ast.err, _("E116: Missing closing parenthesis for function call: %.*s"), cur_node->start); break; } case kExprNodeNested: { east_set_error( - &ast, pstate, + pstate, &ast.err, _("E110: Missing closing parenthesis for nested expression" ": %.*s"), cur_node->start); @@ -1855,7 +1912,7 @@ viml_pexpr_parse_end: if (!cur_node->data.ter.got_colon) { // Actually Vim throws E109 in more cases. east_set_error( - &ast, pstate, _("E109: Missing ':' after '?': %.*s"), + pstate, &ast.err, _("E109: Missing ':' after '?': %.*s"), cur_node->start); } break; diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index cf0907850a..8ca3ceacb9 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -16,7 +16,7 @@ typedef enum { kCCStrategyUseOption = 0, // 0 for xcalloc kCCStrategyMatchCase = '#', kCCStrategyIgnoreCase = '?', -} CaseCompareStrategy; +} ExprCaseCompareStrategy; /// Lexer token type typedef enum { @@ -52,6 +52,14 @@ typedef enum { kExprLexArrow, ///< Arrow, like from lambda expressions. } LexExprTokenType; +typedef enum { + kExprCmpEqual, ///< Equality, unequality. + kExprCmpMatches, ///< Matches regex, not matches regex. + kExprCmpGreater, ///< `>` or `<=` + kExprCmpGreaterOrEqual, ///< `>=` or `<`. + kExprCmpIdentical, ///< `is` or `isnot` +} ExprComparisonType; + /// Lexer token typedef struct { ParserPosition start; @@ -59,14 +67,8 @@ typedef struct { LexExprTokenType type; union { struct { - enum { - kExprLexCmpEqual, ///< Equality, unequality. - kExprLexCmpMatches, ///< Matches regex, not matches regex. - kExprLexCmpGreater, ///< `>` or `<=` - kExprLexCmpGreaterOrEqual, ///< `>=` or `<`. - kExprLexCmpIdentical, ///< `is` or `isnot` - } type; ///< Comparison type. - CaseCompareStrategy ccs; ///< Case comparison strategy. + ExprComparisonType type; ///< Comparison type. + ExprCaseCompareStrategy ccs; ///< Case comparison strategy. bool inv; ///< True if comparison is to be inverted. } cmp; ///< For kExprLexComparison. @@ -171,6 +173,7 @@ typedef enum { kExprNodeComma = ',', ///< Comma “operator”. kExprNodeColon = ':', ///< Colon “operator”. kExprNodeArrow = '>', ///< Arrow “operator”. + kExprNodeComparison = '=', ///< Various comparison operators. } ExprASTNodeType; typedef struct expr_ast_node ExprASTNode; @@ -214,6 +217,11 @@ struct expr_ast_node { struct { bool got_colon; ///< True if colon was seen. } ter; ///< For kExprNodeTernaryValue. + struct { + ExprComparisonType type; ///< Comparison type. + ExprCaseCompareStrategy ccs; ///< Case comparison strategy. + bool inv; ///< True if comparison is to be inverted. + } cmp; ///< For kExprNodeComparison. } data; }; @@ -235,19 +243,22 @@ enum { // viml_expressions_parser.c. } ExprParserFlags; +/// AST error definition +typedef struct { + /// Error message. Must contain a single printf format atom: %.*s. + const char *msg; + /// Error message argument: points to the location of the error. + const char *arg; + /// Message argument length: length till the end of string. + int arg_len; +} ExprASTError; + /// Structure representing complety AST for one expression typedef struct { - /// True if represented AST is correct and can be executed. Incorrect ones may - /// still be used for completion, or in linters. - bool correct; /// When AST is not correct this message will be printed. /// /// Uses `emsgf(msg, arg_len, arg);`, `msg` is assumed to contain only `%.*s`. - struct { - const char *msg; - int arg_len; - const char *arg; - } err; + ExprASTError err; /// Root node of the AST. ExprASTNode *root; } ExprAST; -- cgit From 0bc4e2237960712426da3774c1430f5874c49aea Mon Sep 17 00:00:00 2001 From: ZyX Date: Tue, 3 Oct 2017 00:39:40 +0300 Subject: viml/parser/expressions: Forbid dot or alpha characters after a float This is basically what Vim already does, in addition to forbidding floats should there be a concat immediately before it. --- src/nvim/viml/parser/expressions.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 8e6f991e03..8c95d1db14 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -186,6 +186,7 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) ret.data.num.is_float = false; CHARREG(kExprLexNumber, ascii_isdigit); if (flags & kELFlagAllowFloat) { + const LexExprToken non_float_ret = ret; if (pline.size > ret.len + 1 && pline.data[ret.len] == '.' && ascii_isdigit(pline.data[ret.len + 1])) { @@ -207,6 +208,11 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) CHARREG(kExprLexNumber, ascii_isdigit); } } + if (pline.size > ret.len + && (pline.data[ret.len] == '.' + || ASCII_ISALPHA(pline.data[ret.len]))) { + ret = non_float_ret; + } } break; } -- cgit From 163792e9b9854fe046ada3233dec0fd0f6c55737 Mon Sep 17 00:00:00 2001 From: ZyX Date: Fri, 6 Oct 2017 01:19:43 +0300 Subject: viml/parser/expressions: Make lexer parse numbers, support non-decimal --- src/nvim/viml/parser/expressions.c | 146 ++++++++++++++++++++++++++++++++++--- src/nvim/viml/parser/expressions.h | 6 ++ 2 files changed, 141 insertions(+), 11 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 8c95d1db14..5d892fb8f8 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -15,10 +15,13 @@ #include "nvim/ascii.h" #include "nvim/assert.h" #include "nvim/lib/kvec.h" +#include "nvim/eval/typval.h" #include "nvim/viml/parser/expressions.h" #include "nvim/viml/parser/parser.h" +#define vim_str2nr(s, ...) vim_str2nr((const char_u *)(s), __VA_ARGS__) + typedef kvec_withinit_t(ExprASTNode **, 16) ExprASTStack; /// Which nodes may be wanted @@ -72,6 +75,43 @@ typedef enum { /// Character used as a separator in autoload function/variable names. #define AUTOLOAD_CHAR '#' +/// Scale number by a given factor +/// +/// Used to apply exponent to a number. Idea taken from uClibc. +/// +/// @param[in] num Number to scale. Does not bother doing anything if it is +/// zero. +/// @param[in] base Base, should be 10 since non-decimal floating-point +/// numbers are not supported. +/// @param[in] exponent Exponent to scale by. +/// @param[in] exponent_negative True if exponent is negative. +static inline float_T scale_number(const float_T num, + const uint8_t base, + const uvarnumber_T exponent, + const bool exponent_negative) + FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_CONST +{ + if (num == 0 || exponent == 0) { + return num; + } + assert(base); + uvarnumber_T exp = exponent; + float_T p_base = (float_T)base; + float_T ret = num; + while (exp) { + if (exp & 1) { + if (exponent_negative) { + ret /= p_base; + } else { + ret *= p_base; + } + } + exp >>= 1; + p_base *= p_base; + } + return ret; +} + /// Get next token for the VimL expression input /// /// @param pstate Parser state. @@ -184,6 +224,11 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { ret.data.num.is_float = false; + ret.data.num.base = 10; + size_t frac_start = 0; + size_t exp_start = 0; + size_t frac_end = 0; + bool exp_negative = false; CHARREG(kExprLexNumber, ascii_isdigit); if (flags & kELFlagAllowFloat) { const LexExprToken non_float_ret = ret; @@ -191,8 +236,18 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) && pline.data[ret.len] == '.' && ascii_isdigit(pline.data[ret.len + 1])) { ret.len++; + frac_start = ret.len; + frac_end = ret.len; ret.data.num.is_float = true; - CHARREG(kExprLexNumber, ascii_isdigit); + for (; ret.len < pline.size && ascii_isdigit(pline.data[ret.len]) + ; ret.len++) { + // A small optimization: trailing zeroes in fractional part do not + // add anything to significand, so it is useless to include them in + // frac_end. + if (pline.data[ret.len] != '0') { + frac_end = ret.len + 1; + } + } if (pline.size > ret.len + 1 && (pline.data[ret.len] == 'e' || pline.data[ret.len] == 'E') @@ -202,9 +257,11 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) && ascii_isdigit(pline.data[ret.len + 2])) || ascii_isdigit(pline.data[ret.len + 1]))) { ret.len++; - if (pline.data[ret.len] == '+' || pline.data[ret.len] == '-') { + if (pline.data[ret.len] == '+' + || (exp_negative = (pline.data[ret.len] == '-'))) { ret.len++; } + exp_start = ret.len; CHARREG(kExprLexNumber, ascii_isdigit); } } @@ -214,6 +271,58 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) ret = non_float_ret; } } + // TODO(ZyX-I): detect overflows + if (ret.data.num.is_float) { + // Vim used to use string2float here which in turn uses strtod(). There + // are two problems with this approach: + // 1. strtod() is locale-dependent. Not sure how it is worked around so + // that I do not see relevant bugs, but it still does not look like + // a good idea. + // 2. strtod() does not accept length argument. + // + // The below variant of parsing floats was recognized as acceptable + // because it is basically how uClibc does the thing: it generates + // a number ignoring decimal point (but recording its position), then + // uses recorded position to scale number down when processing exponent. + float_T significand_part = 0; + uvarnumber_T exp_part = 0; + const size_t frac_size = (size_t)(frac_end - frac_start); + for (size_t i = 0; i < frac_end; i++) { + if (i == frac_start - 1) { + continue; + } + significand_part = significand_part * 10 + (pline.data[i] - '0'); + } + if (exp_start) { + vim_str2nr(pline.data + exp_start, NULL, NULL, 0, NULL, &exp_part, + (int)(ret.len - exp_start)); + } + if (exp_negative) { + exp_part += frac_size; + } else { + if (exp_part < frac_size) { + exp_negative = true; + exp_part = frac_size - exp_part; + } else { + exp_part -= frac_size; + } + } + ret.data.num.val.floating = scale_number(significand_part, 10, exp_part, + exp_negative); + } else { + int len; + int prep; + vim_str2nr(pline.data, &prep, &len, STR2NR_ALL, NULL, + &ret.data.num.val.integer, (int)pline.size); + ret.len = (size_t)len; + const uint8_t bases[] = { + [0] = 10, + ['0'] = 8, + ['x'] = 16, ['X'] = 16, + ['b'] = 2, ['B'] = 2, + }; + ret.data.num.base = bases[prep]; + } break; } @@ -474,7 +583,6 @@ viml_pexpr_next_token_adv_return: return ret; } -#ifdef UNIT_TESTING static const char *const eltkn_type_tab[] = { [kExprLexInvalid] = "Invalid", [kExprLexMissing] = "Missing", @@ -617,7 +725,12 @@ const char *viml_pexpr_repr_token(const ParserState *const pstate, (int)token.data.opt.len, token.data.opt.name) TKNARGS(kExprLexPlainIdentifier, "(scope=%s,autoload=%i)", intchar2str(token.data.var.scope), (int)token.data.var.autoload) - TKNARGS(kExprLexNumber, "(is_float=%i)", (int)token.data.num.is_float) + TKNARGS(kExprLexNumber, "(is_float=%i,base=%i,val=%lg)", + (int)token.data.num.is_float, + (int)token.data.num.base, + (double)(token.data.num.is_float + ? token.data.num.val.floating + : token.data.num.val.integer)) TKNARGS(kExprLexInvalid, "(msg=%s)", token.data.err.msg) default: { // No additional arguments. @@ -642,7 +755,6 @@ viml_pexpr_repr_token_end: } return ret; } -#endif #ifdef UNIT_TESTING #include @@ -776,8 +888,10 @@ static inline void viml_pexpr_debug_print_token( // NVimOperator -> Operator // NVimUnaryOperator -> NVimOperator // NVimBinaryOperator -> NVimOperator +// // NVimComparisonOperator -> NVimBinaryOperator // NVimComparisonOperatorModifier -> NVimComparisonOperator +// // NVimTernary -> NVimOperator // NVimTernaryColon -> NVimTernary // @@ -795,8 +909,21 @@ static inline void viml_pexpr_debug_print_token( // NVimIdentifierScope -> NVimIdentifier // NVimIdentifierScopeDelimiter -> NVimIdentifier // +// NVimIdentifierKey -> Identifier +// // NVimFigureBrace -> NVimInternalError // +// NVimUnaryPlus -> NVimUnaryOperator +// NVimBinaryPlus -> NVimBinaryOperator +// NVimConcatOrSubscript -> NVimBinaryOperator +// +// NVimRegister -> SpecialChar +// NVimNumber -> Number +// NVimFloat -> NVimNumber +// +// NVimNestingParenthesis -> NVimParenthesis +// NVimCallingParenthesis -> NVimParenthesis +// // NVimInvalidComma -> NVimInvalidDelimiter // NVimInvalidSpacing -> NVimInvalid // NVimInvalidTernary -> NVimInvalidOperator @@ -814,12 +941,9 @@ static inline void viml_pexpr_debug_print_token( // NVimInvalidIdentifierScopeDelimiter -> NVimInvalidValue // NVimInvalidComparisonOperator -> NVimInvalidOperator // NVimInvalidComparisonOperatorModifier -> NVimInvalidComparisonOperator -// -// NVimUnaryPlus -> NVimUnaryOperator -// NVimBinaryPlus -> NVimBinaryOperator -// NVimRegister -> SpecialChar -// NVimNestingParenthesis -> NVimParenthesis -// NVimCallingParenthesis -> NVimParenthesis +// NVimInvalidNumber -> NVimInvalidValue +// NVimInvalidFloat -> NVimInvalidValue +// NVimInvalidIdentifierKey -> NVimInvalidIdentifier /// Allocate a new node and set some of the values /// diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index 8ca3ceacb9..29903490bb 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -7,6 +7,7 @@ #include "nvim/types.h" #include "nvim/viml/parser/parser.h" +#include "nvim/eval/typval.h" // Defines whether to ignore case: // == kCCStrategyUseOption @@ -113,6 +114,11 @@ typedef struct { } err; ///< For kExprLexInvalid struct { + union { + float_T floating; + uvarnumber_T integer; + } val; ///< Number value. + uint8_t base; ///< Base: 2, 8, 10 or 16. bool is_float; ///< True if number is a floating-point. } num; ///< For kExprLexNumber } data; ///< Additional data, if needed. -- cgit From 21a5ce033c5a853bed3204ea9f0f7a3cfc1d164f Mon Sep 17 00:00:00 2001 From: ZyX Date: Tue, 3 Oct 2017 01:30:02 +0300 Subject: viml/parser/expressions: Add support for the dot operator and numbers --- src/nvim/viml/parser/expressions.c | 108 ++++++++++++++++++++++++++++++++++--- src/nvim/viml/parser/expressions.h | 23 +++++++- 2 files changed, 122 insertions(+), 9 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 5d892fb8f8..4babf4312c 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -915,7 +915,8 @@ static inline void viml_pexpr_debug_print_token( // // NVimUnaryPlus -> NVimUnaryOperator // NVimBinaryPlus -> NVimBinaryOperator -// NVimConcatOrSubscript -> NVimBinaryOperator +// NVimConcat -> NVimBinaryOperator +// NVimConcatOrSubscript -> NVimConcat // // NVimRegister -> SpecialChar // NVimNumber -> Number @@ -971,6 +972,7 @@ static const ExprOpLvl node_type_to_op_lvl[] = { [kExprNodeUnknownFigure] = kEOpLvlParens, [kExprNodeLambda] = kEOpLvlParens, [kExprNodeDictLiteral] = kEOpLvlParens, + [kExprNodeListLiteral] = kEOpLvlParens, [kExprNodeArrow] = kEOpLvlArrow, @@ -985,17 +987,21 @@ static const ExprOpLvl node_type_to_op_lvl[] = { [kExprNodeComparison] = kEOpLvlComparison, [kExprNodeBinaryPlus] = kEOpLvlAddition, + [kExprNodeConcat] = kEOpLvlAddition, [kExprNodeUnaryPlus] = kEOpLvlUnary, + [kExprNodeConcatOrSubscript] = kEOpLvlSubscript, [kExprNodeSubscript] = kEOpLvlSubscript, [kExprNodeCurlyBracesIdentifier] = kEOpLvlComplexIdentifier, [kExprNodeComplexIdentifier] = kEOpLvlValue, [kExprNodePlainIdentifier] = kEOpLvlValue, + [kExprNodePlainKey] = kEOpLvlValue, [kExprNodeRegister] = kEOpLvlValue, - [kExprNodeListLiteral] = kEOpLvlValue, + [kExprNodeInteger] = kEOpLvlValue, + [kExprNodeFloat] = kEOpLvlValue, }; static const ExprOpAssociativity node_type_to_op_ass[] = { @@ -1008,6 +1014,7 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { [kExprNodeUnknownFigure] = kEOpAssLeft, [kExprNodeLambda] = kEOpAssNo, [kExprNodeDictLiteral] = kEOpAssNo, + [kExprNodeListLiteral] = kEOpAssNo, // Does not really matter. [kExprNodeArrow] = kEOpAssNo, @@ -1030,17 +1037,21 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { [kExprNodeComparison] = kEOpAssRight, [kExprNodeBinaryPlus] = kEOpAssLeft, + [kExprNodeConcat] = kEOpAssLeft, [kExprNodeUnaryPlus] = kEOpAssNo, + [kExprNodeConcatOrSubscript] = kEOpAssLeft, [kExprNodeSubscript] = kEOpAssLeft, [kExprNodeCurlyBracesIdentifier] = kEOpAssLeft, [kExprNodeComplexIdentifier] = kEOpAssLeft, [kExprNodePlainIdentifier] = kEOpAssNo, + [kExprNodePlainKey] = kEOpAssNo, [kExprNodeRegister] = kEOpAssNo, - [kExprNodeListLiteral] = kEOpAssNo, + [kExprNodeInteger] = kEOpAssNo, + [kExprNodeFloat] = kEOpAssNo, }; /// Get AST node priority level @@ -1420,10 +1431,20 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) [kENodeArgument] = kELFlagIsNotCmp, [kENodeArgumentSeparator] = kELFlagForbidScope, }; - // FIXME Determine when (not) to allow floating-point numbers. + const bool is_concat_or_subscript = ( + want_node == kENodeValue + && kv_size(ast_stack) > 1 + && (*kv_Z(ast_stack, 1))->type == kExprNodeConcatOrSubscript); const int lexer_additional_flags = ( kELFlagPeek - | ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0)); + | ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0) + | ((want_node == kENodeValue + && (kv_size(ast_stack) == 1 + || ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat + && ((*kv_Z(ast_stack, 1))->type + != kExprNodeConcatOrSubscript)))) + ? kELFlagAllowFloat + : 0)); LexExprToken cur_token = viml_pexpr_next_token( pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags); if (cur_token.type == kExprLexEOC) { @@ -1456,11 +1477,42 @@ viml_pexpr_parse_process_token: ExprASTNode *cur_node = NULL; assert((want_node == kENodeValue || want_node == kENodeArgument) == (*top_node_p == NULL)); + // Note: in Vim whether expression "cond?d.a:2" is valid depends both on + // "cond" and whether "d" is a dictionary: expression is valid if condition + // is true and "d" is a dictionary (with "a" key or it will complain about + // missing one, but this is not relevant); if any of the requirements is + // broken then this thing is parsed as "d . a:2" yielding missing colon + // error. This parser does not allow such ambiguity, especially because it + // simply can’t: whether "d" is a dictionary is not known at the parsing + // time. + // + // Here example will always contain a concat with "a:2" sucking colon, + // making expression invalid both because there is no longer a spare colon + // for ternary and because concatenating dictionary with anything is not + // valid. There are more cases when this will make a difference though. + const bool node_is_key = ( + is_concat_or_subscript + && (cur_token.type == kExprLexPlainIdentifier + ? (!cur_token.data.var.autoload + && cur_token.data.var.scope == 0) + : (cur_token.type == kExprLexNumber)) + && prev_token.type != kExprLexSpacing); + if (is_concat_or_subscript && !node_is_key) { + // Note: in Vim "d. a" (this is the reason behind `prev_token.type != + // kExprLexSpacing` part of the condition) as well as any other "d.{expr}" + // where "{expr}" does not look like a key is invalid whenever "d" happens + // to be a dictionary. Since parser has no idea whether preceding + // expression is actually a dictionary it can’t outright reject anything, + // so it turns kExprNodeConcatOrSubscript into kExprNodeConcat instead, + // which will yield different errors then Vim does in a number of + // circumstances, and in any case runtime and not parse time errors. + (*kv_Z(ast_stack, 1))->type = kExprNodeConcat; + } if ((want_node == kENodeArgumentSeparator && tok_type != kExprLexComma && tok_type != kExprLexArrow) || (want_node == kENodeArgument - && !(tok_type == kExprLexPlainIdentifier + && !(cur_token.type == kExprLexPlainIdentifier && cur_token.data.var.scope == 0 && !cur_token.data.var.autoload) && tok_type != kExprLexArrow)) { @@ -1844,7 +1896,10 @@ viml_pexpr_parse_figure_brace_closing_error: want_node = (want_node == kENodeArgument ? kENodeArgumentSeparator : kENodeOperator); - NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier); + NEW_NODE_WITH_CUR_POS(cur_node, + (node_is_key + ? kExprNodePlainKey + : kExprNodePlainIdentifier)); cur_node->data.var.scope = cur_token.data.var.scope; const size_t scope_shift = (cur_token.data.var.scope == 0 ? 0 @@ -1854,6 +1909,7 @@ viml_pexpr_parse_figure_brace_closing_error: cur_node->data.var.ident_len = cur_token.len - scope_shift; *top_node_p = cur_node; if (scope_shift) { + assert(!node_is_key); viml_parser_highlight(pstate, cur_token.start, 1, HL(IdentifierScope)); viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1, @@ -1863,7 +1919,9 @@ viml_pexpr_parse_figure_brace_closing_error: viml_parser_highlight(pstate, shifted_pos(cur_token.start, scope_shift), cur_token.len - scope_shift, - HL(Identifier)); + (node_is_key + ? HL(IdentifierKey) + : HL(Identifier))); } } else { if (cur_token.data.var.scope == 0) { @@ -1882,6 +1940,40 @@ viml_pexpr_parse_figure_brace_closing_error: } break; } + case kExprLexNumber: { + if (want_node != kENodeValue) { + OP_MISSING; + } + if (node_is_key) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainKey); + cur_node->data.var.ident = pline.data + cur_token.start.col; + cur_node->data.var.ident_len = cur_token.len; + HL_CUR_TOKEN(IdentifierKey); + } else if (cur_token.data.num.is_float) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeFloat); + cur_node->data.flt.value = cur_token.data.num.val.floating; + HL_CUR_TOKEN(Float); + } else { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeInteger); + cur_node->data.num.value = cur_token.data.num.val.integer; + HL_CUR_TOKEN(Number); + } + want_node = kENodeOperator; + *top_node_p = cur_node; + break; + } + case kExprLexDot: { + ADD_VALUE_IF_MISSING(_("E15: Unexpected dot: %.*s")); + if (prev_token.type == kExprLexSpacing) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcat); + HL_CUR_TOKEN(Concat); + } else { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcatOrSubscript); + HL_CUR_TOKEN(ConcatOrSubscript); + } + ADD_OP_NODE(cur_node); + break; + } case kExprLexParenthesis: { if (cur_token.data.brc.closing) { if (want_node == kENodeValue) { diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index 29903490bb..0d496c87ba 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -166,6 +166,8 @@ typedef enum { /// Looks like "string", "g:Foo", etc: consists from a single /// kExprLexPlainIdentifier token. kExprNodePlainIdentifier = 'i', + /// Plain dictionary key, for use with kExprNodeConcatOrSubscript + kExprNodePlainKey = 'k', /// Complex identifier: variable/function name with curly braces kExprNodeComplexIdentifier = 'I', /// Figure brace expression which is not yet known @@ -180,6 +182,19 @@ typedef enum { kExprNodeColon = ':', ///< Colon “operator”. kExprNodeArrow = '>', ///< Arrow “operator”. kExprNodeComparison = '=', ///< Various comparison operators. + /// Concat operator + /// + /// To be only used in cases when it is known for sure it is not a subscript. + kExprNodeConcat = '.', + /// Concat or subscript operator + /// + /// For cases when it is not obvious whether expression is a concat or + /// a subscript. May only have either number or plain identifier as the second + /// child. To make it easier to avoid curly braces in place of + /// kExprNodePlainIdentifier node kExprNodePlainKey is used. + kExprNodeConcatOrSubscript = 'S', + kExprNodeInteger = '0', ///< Integral number. + kExprNodeFloat = '1', ///< Floating-point number. } ExprASTNodeType; typedef struct expr_ast_node ExprASTNode; @@ -219,7 +234,7 @@ struct expr_ast_node { /// Points to inside parser reader state. const char *ident; size_t ident_len; ///< Actual identifier length. - } var; ///< For kExprNodePlainIdentifier. + } var; ///< For kExprNodePlainIdentifier and kExprNodePlainKey. struct { bool got_colon; ///< True if colon was seen. } ter; ///< For kExprNodeTernaryValue. @@ -228,6 +243,12 @@ struct expr_ast_node { ExprCaseCompareStrategy ccs; ///< Case comparison strategy. bool inv; ///< True if comparison is to be inverted. } cmp; ///< For kExprNodeComparison. + struct { + uvarnumber_T value; + } num; ///< For kExprNodeInteger. + struct { + float_T value; + } flt; ///< For kExprNodeFloat. } data; }; -- cgit From e45e519495832e3d1d0fde1e32723d4140c5fc65 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 8 Oct 2017 01:19:58 +0300 Subject: viml/parser/expressions: Error out on multiple colons in a row --- src/nvim/viml/parser/expressions.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 4babf4312c..fffae8833d 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -1691,8 +1691,10 @@ viml_pexpr_parse_invalid_comma: SELECT_FIGURE_BRACE_TYPE(*eastnode_p, DictLiteral, Dict); break; } else if (eastnode_type == kExprNodeDictLiteral - || eastnode_type == kExprNodeComma) { + || eastnode_type == kExprNodeSubscript) { break; + } else if (eastnode_type == kExprNodeColon) { + goto viml_pexpr_parse_invalid_colon; } else if (eastnode_lvl >= kEOpLvlTernaryValue) { // Do nothing } else if (eastnode_lvl > kEOpLvlComma) { -- cgit From bd3a4166b25a64dbe406be09b3140955cf694477 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 8 Oct 2017 02:17:05 +0300 Subject: viml/parser/expressions: Add support for subscript and list literals --- src/nvim/viml/parser/expressions.c | 164 ++++++++++++++++++++++++++++++++----- 1 file changed, 142 insertions(+), 22 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index fffae8833d..0613cc66d5 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -905,6 +905,10 @@ static inline void viml_pexpr_debug_print_token( // NVimDict -> Delimiter // NVimCurly -> Delimiter // +// NVimList -> Delimiter +// NVimSubscript -> Delimiter +// NVimSubscriptColon -> NVimSubscript +// // NVimIdentifier -> Identifier // NVimIdentifierScope -> NVimIdentifier // NVimIdentifierScopeDelimiter -> NVimIdentifier @@ -945,6 +949,9 @@ static inline void viml_pexpr_debug_print_token( // NVimInvalidNumber -> NVimInvalidValue // NVimInvalidFloat -> NVimInvalidValue // NVimInvalidIdentifierKey -> NVimInvalidIdentifier +// NVimInvalidList -> NVimInvalidDelimiter +// NVimInvalidSubscript -> NVimInvalidDelimiter +// NVimInvalidSubscriptColon -> NVimInvalidSubscript /// Allocate a new node and set some of the values /// @@ -965,9 +972,10 @@ static const ExprOpLvl node_type_to_op_lvl[] = { [kExprNodeOpMissing] = kEOpLvlMultiplication, [kExprNodeNested] = kEOpLvlParens, - // Note: it is kEOpLvlSubscript for “binary operator” itself, but + // Note: below nodes are kEOpLvlSubscript for “binary operator” itself, but // kEOpLvlParens when it comes to inside the parenthesis. [kExprNodeCall] = kEOpLvlParens, + [kExprNodeSubscript] = kEOpLvlParens, [kExprNodeUnknownFigure] = kEOpLvlParens, [kExprNodeLambda] = kEOpLvlParens, @@ -992,7 +1000,6 @@ static const ExprOpLvl node_type_to_op_lvl[] = { [kExprNodeUnaryPlus] = kEOpLvlUnary, [kExprNodeConcatOrSubscript] = kEOpLvlSubscript, - [kExprNodeSubscript] = kEOpLvlSubscript, [kExprNodeCurlyBracesIdentifier] = kEOpLvlComplexIdentifier, @@ -1010,6 +1017,7 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { [kExprNodeNested] = kEOpAssNo, [kExprNodeCall] = kEOpAssNo, + [kExprNodeSubscript] = kEOpAssNo, [kExprNodeUnknownFigure] = kEOpAssLeft, [kExprNodeLambda] = kEOpAssNo, @@ -1042,7 +1050,6 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { [kExprNodeUnaryPlus] = kEOpAssNo, [kExprNodeConcatOrSubscript] = kEOpAssLeft, - [kExprNodeSubscript] = kEOpAssLeft, [kExprNodeCurlyBracesIdentifier] = kEOpAssLeft, @@ -1105,12 +1112,14 @@ static bool viml_pexpr_handle_bop(const ParserState *const pstate, ExprOpLvl top_node_lvl; ExprOpAssociativity top_node_ass; assert(kv_size(*ast_stack)); - const ExprOpLvl bop_node_lvl = (bop_node->type == kExprNodeCall + const ExprOpLvl bop_node_lvl = ((bop_node->type == kExprNodeCall + || bop_node->type == kExprNodeSubscript) ? kEOpLvlSubscript : node_lvl(*bop_node)); #ifndef NDEBUG const ExprOpAssociativity bop_node_ass = ( - bop_node->type == kExprNodeCall + (bop_node->type == kExprNodeCall + || bop_node->type == kExprNodeSubscript) ? kEOpAssLeft : node_ass(*bop_node)); #endif @@ -1214,8 +1223,8 @@ static inline ParserPosition shifted_pos(const ParserPosition pos, /// @param cur_token Token to set position from. #define POS_FROM_TOKEN(cur_node, cur_token) \ do { \ - cur_node->start = cur_token.start; \ - cur_node->len = cur_token.len; \ + (cur_node)->start = cur_token.start; \ + (cur_node)->len = cur_token.len; \ } while (0) /// Allocate new node and set its position from the current token @@ -1226,11 +1235,11 @@ static inline ParserPosition shifted_pos(const ParserPosition pos, /// @param typ Node type. #define NEW_NODE_WITH_CUR_POS(cur_node, typ) \ do { \ - cur_node = NEW_NODE(typ); \ - POS_FROM_TOKEN(cur_node, cur_token); \ + (cur_node) = NEW_NODE(typ); \ + POS_FROM_TOKEN((cur_node), cur_token); \ if (prev_token.type == kExprLexSpacing) { \ - cur_node->start = prev_token.start; \ - cur_node->len += prev_token.len; \ + (cur_node)->start = prev_token.start; \ + (cur_node)->len += prev_token.len; \ } \ } while (0) @@ -1280,9 +1289,8 @@ static inline ParserPosition shifted_pos(const ParserPosition pos, do { \ if (want_node == kENodeValue) { \ ERROR_FROM_TOKEN_AND_MSG(cur_token, (msg)); \ - NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing); \ - cur_node->len = 0; \ - *top_node_p = cur_node; \ + NEW_NODE_WITH_CUR_POS((*top_node_p), kExprNodeMissing); \ + (*top_node_p)->len = 0; \ want_node = kENodeOperator; \ } \ } while (0) @@ -1658,13 +1666,14 @@ viml_pexpr_parse_invalid_comma: HL_CUR_TOKEN(Comma); break; } +#define EXP_VAL_COLON "E15: Expected value, got colon: %.*s" case kExprLexColon: { - ADD_VALUE_IF_MISSING(_("E15: Expected value, got colon: %.*s")); if (kv_size(ast_stack) < 2) { goto viml_pexpr_parse_invalid_colon; } bool is_ternary = false; bool can_be_ternary = true; + bool is_subscript = false; for (size_t i = 1; i < kv_size(ast_stack); i++) { ExprASTNode *const *const eastnode_p = (ExprASTNode *const *)kv_Z(ast_stack, i); @@ -1683,6 +1692,7 @@ viml_pexpr_parse_invalid_comma: } is_ternary = true; (*eastnode_p)->data.ter.got_colon = true; + ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); assert((*eastnode_p)->children != NULL); assert((*eastnode_p)->children->next == NULL); kvi_push(ast_stack, &(*eastnode_p)->children->next); @@ -1690,14 +1700,18 @@ viml_pexpr_parse_invalid_comma: } else if (eastnode_type == kExprNodeUnknownFigure) { SELECT_FIGURE_BRACE_TYPE(*eastnode_p, DictLiteral, Dict); break; - } else if (eastnode_type == kExprNodeDictLiteral - || eastnode_type == kExprNodeSubscript) { + } else if (eastnode_type == kExprNodeDictLiteral) { + break; + } else if (eastnode_type == kExprNodeSubscript) { + is_subscript = true; + can_be_ternary = false; + assert(!is_ternary); break; } else if (eastnode_type == kExprNodeColon) { goto viml_pexpr_parse_invalid_colon; } else if (eastnode_lvl >= kEOpLvlTernaryValue) { // Do nothing - } else if (eastnode_lvl > kEOpLvlComma) { + } else if (eastnode_lvl >= kEOpLvlComma) { can_be_ternary = false; } else { viml_pexpr_parse_invalid_colon: @@ -1711,16 +1725,122 @@ viml_pexpr_parse_invalid_colon: goto viml_pexpr_parse_invalid_colon; } } - if (is_ternary) { - HL_CUR_TOKEN(TernaryColon); - } else { + if (is_subscript) { + assert(kv_size(ast_stack) > 1); + // Colon immediately following subscript start: it is empty subscript + // part like a[:2]. + if (want_node == kENodeValue + && (*kv_Z(ast_stack, 1))->type == kExprNodeSubscript) { + NEW_NODE_WITH_CUR_POS(*top_node_p, kExprNodeMissing); + (*top_node_p)->len = 0; + want_node = kENodeOperator; + } else { + ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); + } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); ADD_OP_NODE(cur_node); - HL_CUR_TOKEN(Colon); + HL_CUR_TOKEN(SubscriptColon); + } else { + ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); + if (is_ternary) { + HL_CUR_TOKEN(TernaryColon); + } else { + ADD_OP_NODE(cur_node); + HL_CUR_TOKEN(Colon); + } } want_node = kENodeValue; break; } +#undef EXP_VAL_COLON + case kExprLexBracket: { + if (cur_token.data.brc.closing) { + ExprASTNode **new_top_node_p = NULL; + // Always drop the topmost value: + // + // 1. When want_node != kENodeValue topmost item on stack is + // a *finished* left operand, which may as well be "{@a}" which + // needs not be finished again. + // 2. Otherwise it is pointing to NULL what nobody wants. + kv_drop(ast_stack, 1); + if (!kv_size(ast_stack)) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral); + cur_node->len = 0; + if (want_node != kENodeValue) { + cur_node->children = *top_node_p; + } + *top_node_p = cur_node; + goto viml_pexpr_parse_bracket_closing_error; + } + if (want_node == kENodeValue) { + // It is OK to want value if + // + // 1. It is empty list literal, in which case top node will be + // ListLiteral. + // 2. It is list literal with trailing comma, in which case top node + // will be that comma. + // 3. It is subscript with colon, but without one of the values: + // e.g. "a[:]", "a[1:]", top node will be colon in this case. + if ((*kv_last(ast_stack))->type != kExprNodeListLiteral + && (*kv_last(ast_stack))->type != kExprNodeComma + && (*kv_last(ast_stack))->type != kExprNodeColon) { + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Expected value, got closing bracket: %.*s")); + } + } else { + if (!kv_size(ast_stack)) { + new_top_node_p = top_node_p; + goto viml_pexpr_parse_bracket_closing_error; + } + } + do { + new_top_node_p = kv_pop(ast_stack); + } while (kv_size(ast_stack) + && (new_top_node_p == NULL + || ((*new_top_node_p)->type != kExprNodeListLiteral + && (*new_top_node_p)->type != kExprNodeSubscript))); + ExprASTNode *new_top_node = *new_top_node_p; + switch (new_top_node->type) { + case kExprNodeListLiteral: { + HL_CUR_TOKEN(List); + break; + } + case kExprNodeSubscript: { + HL_CUR_TOKEN(Subscript); + break; + } + default: { +viml_pexpr_parse_bracket_closing_error: + assert(!kv_size(ast_stack)); + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Unexpected closing figure brace: %.*s")); + HL_CUR_TOKEN(List); + break; + } + } + kvi_push(ast_stack, new_top_node_p); + want_node = kENodeOperator; + } else { + if (want_node == kENodeValue) { + // Value means list literal. + HL_CUR_TOKEN(List); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral); + *top_node_p = cur_node; + kvi_push(ast_stack, &cur_node->children); + want_node = kENodeValue; + } else { + if (prev_token.type == kExprLexSpacing) { + OP_MISSING; + } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeSubscript); + ADD_OP_NODE(cur_node); + HL_CUR_TOKEN(Subscript); + } + } + break; + } case kExprLexFigureBrace: { if (cur_token.data.brc.closing) { ExprASTNode **new_top_node_p = NULL; -- cgit From af38cea133f5ebb67208cedd289e408cd1dad15a Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 8 Oct 2017 21:52:38 +0300 Subject: viml/parser/expressions: Add support for string parsing --- src/nvim/viml/parser/expressions.c | 376 ++++++++++++++++++++++++++++++++++++- src/nvim/viml/parser/expressions.h | 7 + 2 files changed, 375 insertions(+), 8 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 0613cc66d5..3f30fe2a0e 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -915,8 +915,6 @@ static inline void viml_pexpr_debug_print_token( // // NVimIdentifierKey -> Identifier // -// NVimFigureBrace -> NVimInternalError -// // NVimUnaryPlus -> NVimUnaryOperator // NVimBinaryPlus -> NVimBinaryOperator // NVimConcat -> NVimBinaryOperator @@ -929,6 +927,18 @@ static inline void viml_pexpr_debug_print_token( // NVimNestingParenthesis -> NVimParenthesis // NVimCallingParenthesis -> NVimParenthesis // +// NVimString -> String +// NVimStringSpecial -> SpecialChar +// NVimSingleQuote -> NVimString +// NVimSingleQuotedBody -> NVimString +// NVimSingleQuotedQuote -> NVimStringSpecial +// NVimDoubleQuote -> NVimString +// NVimDoubleQuotedBody -> NVimString +// NVimDoubleQuotedEscape -> NVimStringSpecial +// NVimDoubleQuotedUnknownEscape -> NVimInvalid +// +// " Note: NVimDoubleQuotedUnknownEscape is not actually invalid +// // NVimInvalidComma -> NVimInvalidDelimiter // NVimInvalidSpacing -> NVimInvalid // NVimInvalidTernary -> NVimInvalidOperator @@ -952,6 +962,19 @@ static inline void viml_pexpr_debug_print_token( // NVimInvalidList -> NVimInvalidDelimiter // NVimInvalidSubscript -> NVimInvalidDelimiter // NVimInvalidSubscriptColon -> NVimInvalidSubscript +// NVimInvalidString -> NVimInvalidValue +// NVimInvalidStringSpecial -> NVimInvalidString +// NVimInvalidSingleQuote -> NVimInvalidString +// NVimInvalidSingleQuotedBody -> NVimInvalidString +// NVimInvalidSingleQuotedQuote -> NVimInvalidStringSpecial +// NVimInvalidDoubleQuote -> NVimInvalidString +// NVimInvalidDoubleQuotedBody -> NVimInvalidString +// NVimInvalidDoubleQuotedEscape -> NVimInvalidStringSpecial +// NVimInvalidDoubleQuotedUnknownEscape -> NVimInvalid +// +// NVimFigureBrace -> NVimInternalError +// NVimInvalidSingleQuotedUnknownEscape -> NVimInternalError +// NVimSingleQuotedUnknownEscape -> NVimInternalError /// Allocate a new node and set some of the values /// @@ -1402,6 +1425,318 @@ static inline void east_set_error(const ParserState *const pstate, } \ } while (0) +/// Structure used to define “string shifts” necessary to map string +/// highlighting to actual strings. +typedef struct { + size_t start; ///< Where special character starts in original string. + size_t orig_len; ///< Length of orininal string (e.g. 4 for "\x80"). + size_t act_len; ///< Length of resulting character(s) (e.g. 1 for "\x80"). + bool escape_not_known; ///< True if escape sequence in original is not known. +} StringShift; + +/// Parse and highlight single- or double-quoted string +/// +/// Function is supposed to detect and highlight regular expressions (but does +/// not do now). +/// +/// @param[out] pstate Parser state which also contains a place where +/// highlighting is saved. +/// @param[out] node Node where string parsing results are saved. +/// @param[in] token Token to highlight. +/// @param[in] ast_stack Parser AST stack, used to detect whether current +/// string is a regex. +/// @param[in] is_invalid Whether currently processed token is not valid. +static void parse_quoted_string(ParserState *const pstate, + ExprASTNode *const node, + const LexExprToken token, + const ExprASTStack ast_stack, + const bool is_invalid) + FUNC_ATTR_NONNULL_ALL +{ + const ParserLine pline = pstate->reader.lines.items[token.start.line]; + const char *const s = pline.data + token.start.col; + const char *const e = s + token.len - token.data.str.closed; + const char *p = s + 1; + const bool is_double = (token.type == kExprLexDoubleQuotedString); + size_t size = token.len - token.data.str.closed - 1; + kvec_withinit_t(StringShift, 16) shifts; + kvi_init(shifts); + if (!is_double) { + viml_parser_highlight(pstate, token.start, 1, HL(SingleQuotedString)); + while (p < e) { + const char *const chunk_e = memchr(p, '\'', (size_t)(e - p)); + if (chunk_e == NULL) { + break; + } + size--; + p = chunk_e + 2; + if (pstate->colors) { + kvi_push(shifts, ((StringShift) { + .start = token.start.col + (size_t)(chunk_e - s), + .orig_len = 2, + .act_len = 1, + .escape_not_known = false, + })); + } + } + node->data.str.size = size; + if (size == 0) { + node->data.str.value = NULL; + } else { + char *v_p; + v_p = node->data.str.value = xmalloc(size); + p = s + 1; + while (p < e) { + const char *const chunk_e = memchr(p, '\'', (size_t)(e - p)); + if (chunk_e == NULL) { + memcpy(v_p, p, (size_t)(e - p)); + break; + } + memcpy(v_p, p, (size_t)(chunk_e - p)); + v_p += (size_t)(chunk_e - p) + 1; + v_p[-1] = '\''; + p = chunk_e + 2; + } + } + } else { + viml_parser_highlight(pstate, token.start, 1, HL(DoubleQuotedString)); + for (p = s + 1; p < e; p++) { + if (*p == '\\' && p + 1 < e) { + p++; + if (p + 1 == e) { + size--; + break; + } + switch (*p) { + // A "\" form occupies at least 4 characters, and produces up to + // 6 characters: reserve space for 2 extra, but do not compute actual + // length just now, it would be costy. + case '<': { + size += 2; + break; + } + // Hexadecimal, always single byte, but at least three bytes each. + case 'x': case 'X': { + size--; + if (ascii_isxdigit(p[1])) { + size--; + if (p + 2 < e && ascii_isxdigit(p[2])) { + size--; + } + } + break; + } + // Unicode + // + // \uF takes 1 byte which is 2 bytes less then escape sequence. + // \uFF: 2 bytes, 2 bytes less. + // \uFFF: 3 bytes, 2 bytes less. + // \uFFFF: 3 bytes, 3 bytes less. + // \UFFFFF: 4 bytes, 3 bytes less. + // \UFFFFFF: 5 bytes, 3 bytes less. + // \UFFFFFFF: 6 bytes, 3 bytes less. + // \U7FFFFFFF: 6 bytes, 4 bytes less. + case 'u': case 'U': { + const char *const esc_start = p; + size_t n = (*p == 'u' ? 4 : 8); + int nr = 0; + p++; + while (n-- && ascii_isxdigit(p[1])) { + p++; + nr = (nr << 4) + hex2nr(*p); + } + // Escape length: (esc_start - 1) points to "\\", esc_start to "u" + // or "U", p to the byte after last byte. So escape sequence + // occupies p - (esc_start - 1), but it stands for a utf_char2len + // bytes. + size -= (size_t)((p - (esc_start - 1)) - utf_char2len(nr)); + p--; + break; + } + // Octal, always single byte, but at least two bytes each. + case '0': case '1': case '2': case '3': case '4': case '5': case '6': + case '7': { + size--; + p++; + if (*p >= '0' && *p <= '7') { + size--; + p++; + if (*p >= '0' && *p <= '7') { + size--; + p++; + } + } + break; + } + default: { + size--; + break; + } + } + } + } + if (size == 0) { + node->data.str.value = NULL; + node->data.str.size = 0; + } else { + char *v_p; + v_p = node->data.str.value = xmalloc(size); + p = s + 1; + while (p < e) { + const char *const chunk_e = memchr(p, '\\', (size_t)(e - p)); + if (chunk_e == NULL) { + memcpy(v_p, p, (size_t)(e - p)); + v_p += e - p; + break; + } + memcpy(v_p, p, (size_t)(chunk_e - p)); + v_p += (size_t)(chunk_e - p); + p = chunk_e + 1; + if (p == e) { + *v_p++ = '\\'; + break; + } + bool is_unknown = false; + const char *const v_p_start = v_p; + switch (*p) { +#define SINGLE_CHAR_ESC(ch, real_ch) \ + case ch: { \ + *v_p++ = real_ch; \ + p++; \ + break; \ + } + SINGLE_CHAR_ESC('b', BS) + SINGLE_CHAR_ESC('e', ESC) + SINGLE_CHAR_ESC('f', FF) + SINGLE_CHAR_ESC('n', NL) + SINGLE_CHAR_ESC('r', CAR) + SINGLE_CHAR_ESC('t', TAB) + SINGLE_CHAR_ESC('"', '"') + SINGLE_CHAR_ESC('\\', '\\') +#undef SINGLE_CHAR_ESC + + // Hexadecimal or unicode. + case 'X': case 'x': case 'u': case 'U': { + if (ascii_isxdigit(p[1])) { + size_t n; + int nr; + bool is_hex = (*p == 'x' || *p == 'X'); + + if (is_hex) { + n = 2; + } else if (*p == 'u') { + n = 4; + } else { + n = 8; + } + nr = 0; + while (n-- && ascii_isxdigit(p[1])) { + p++; + nr = (nr << 4) + hex2nr(*p); + } + p++; + if (is_hex) { + *v_p++ = (char)nr; + } else { + v_p += utf_char2bytes(nr, (char_u *)v_p); + } + } else { + is_unknown = true; + *v_p++ = *p; + p++; + } + break; + } + // Octal: "\1", "\12", "\123". + case '0': case '1': case '2': case '3': case '4': case '5': case '6': + case '7': { + uint8_t ch = (uint8_t)(*p++ - '0'); + if (*p >= '0' && *p <= '7') { + ch = (uint8_t)((ch << 3) + *p++ - '0'); + if (*p >= '0' && *p <= '7') { + ch = (uint8_t)((ch << 3) + *p++ - '0'); + } + } + *v_p++ = (char)ch; + break; + } + // Special key, e.g.: "\" + case '<': { + const size_t special_len = ( + trans_special((const char_u **)&p, (size_t)(e - p), + (char_u *)v_p, true, true)); + if (special_len != 0) { + v_p += special_len; + } else { + is_unknown = true; + mb_copy_char((const char_u **)&p, (char_u **)&v_p); + } + break; + } + default: { + is_unknown = true; + mb_copy_char((const char_u **)&p, (char_u **)&v_p); + break; + } + } + if (pstate->colors) { + kvi_push(shifts, ((StringShift) { + .start = token.start.col + (size_t)(chunk_e - s), + .orig_len = (size_t)(p - chunk_e), + .act_len = (size_t)(v_p - (char *)v_p_start), + .escape_not_known = is_unknown, + })); + } + } + node->data.str.size = (size_t)(v_p - node->data.str.value); + } + } + if (pstate->colors) { + // TODO(ZyX-I): use ast_stack to determine and highlight regular expressions + // TODO(ZyX-I): use ast_stack to determine and highlight printf format str + // TODO(ZyX-I): use ast_stack to determine and highlight expression strings + size_t next_col = 1; + const char *const body_str = (is_double + ? HL(DoubleQuotedBody) + : HL(SingleQuotedBody)); + const char *const esc_str = (is_double + ? HL(DoubleQuotedEscape) + : HL(SingleQuotedQuote)); + const char *const ukn_esc_str = (is_double + ? HL(DoubleQuotedUnknownEscape) + : HL(SingleQuotedUnknownEscape)); + for (size_t i = 0; i < kv_size(shifts); i++) { + const StringShift cur_shift = kv_A(shifts, i); + if (cur_shift.start > next_col) { + viml_parser_highlight(pstate, shifted_pos(token.start, next_col), + cur_shift.start - next_col, + body_str); + } + viml_parser_highlight(pstate, shifted_pos(token.start, cur_shift.start), + cur_shift.orig_len, + (cur_shift.escape_not_known + ? ukn_esc_str + : esc_str)); + next_col = cur_shift.start + cur_shift.orig_len; + } + if (next_col < token.len - token.data.str.closed) { + viml_parser_highlight(pstate, shifted_pos(token.start, next_col), + token.len - token.data.str.closed - next_col, + body_str); + } + } + if (token.data.str.closed) { + if (is_double) { + viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1), + 1, HL(DoubleQuotedString)); + } else { + viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1), + 1, HL(SingleQuotedString)); + } + } + kvi_destroy(shifts); +} + /// Parse one VimL expression /// /// @param pstate Parser state. @@ -1714,12 +2049,7 @@ viml_pexpr_parse_invalid_comma: } else if (eastnode_lvl >= kEOpLvlComma) { can_be_ternary = false; } else { -viml_pexpr_parse_invalid_colon: - ERROR_FROM_TOKEN_AND_MSG( - cur_token, - _("E15: Colon outside of dictionary or ternary operator: " - "%.*s")); - break; + goto viml_pexpr_parse_invalid_colon; } if (i == kv_size(ast_stack) - 1) { goto viml_pexpr_parse_invalid_colon; @@ -1741,6 +2071,12 @@ viml_pexpr_parse_invalid_colon: ADD_OP_NODE(cur_node); HL_CUR_TOKEN(SubscriptColon); } else { + goto viml_pexpr_parse_valid_colon; +viml_pexpr_parse_invalid_colon: + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Colon outside of dictionary or ternary operator: %.*s")); +viml_pexpr_parse_valid_colon: ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); if (is_ternary) { @@ -2201,6 +2537,30 @@ viml_pexpr_parse_no_paren_closing_error: {} kvi_push(ast_stack, &ter_val_node->children); break; } + case kExprLexDoubleQuotedString: + case kExprLexSingleQuotedString: { + const bool is_double = (tok_type == kExprLexDoubleQuotedString); + if (!cur_token.data.str.closed) { + // It is weird, but Vim has two identical errors messages with + // different error numbers: "E114: Missing quote" and + // "E115: Missing quote". + ERROR_FROM_TOKEN_AND_MSG( + cur_token, (is_double + ? _("E114: Missing double quote: %.*s") + : _("E115: Missing single quote: %.*s"))); + } + if (want_node == kENodeOperator) { + OP_MISSING; + } + NEW_NODE_WITH_CUR_POS( + cur_node, (is_double + ? kExprNodeDoubleQuotedString + : kExprNodeSingleQuotedString)); + *top_node_p = cur_node; + parse_quoted_string(pstate, cur_node, cur_token, ast_stack, is_invalid); + want_node = kENodeOperator; + break; + } } viml_pexpr_parse_cycle_end: prev_token = cur_token; diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index 0d496c87ba..a09cdde4c0 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -195,6 +195,8 @@ typedef enum { kExprNodeConcatOrSubscript = 'S', kExprNodeInteger = '0', ///< Integral number. kExprNodeFloat = '1', ///< Floating-point number. + kExprNodeSingleQuotedString = '\'', + kExprNodeDoubleQuotedString = '"', } ExprASTNodeType; typedef struct expr_ast_node ExprASTNode; @@ -249,6 +251,11 @@ struct expr_ast_node { struct { float_T value; } flt; ///< For kExprNodeFloat. + struct { + char *value; + size_t size; + } str; ///< For kExprNodeSingleQuotedString and + ///< kExprNodeDoubleQuotedString. } data; }; -- cgit From fa3cfc0dd54df125a1dbabccda47a5f45dc483ae Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 9 Oct 2017 02:55:56 +0300 Subject: viml/parser/expressions: Finish parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note: formatc.lua was unable to swallow some newer additions to ExprASTNodeType (specifically `kExprNodeOr = '|'` and probably something else), so all `= …` were dropped: in any case they only were there in order to not bother updating viml_pexpr_debug_print_ast_node and since it is now known all nodes which will be present it is not much of an issue. --- src/nvim/viml/parser/expressions.c | 369 +++++++++++++++++++++++++------------ src/nvim/viml/parser/expressions.h | 115 ++++++++---- src/nvim/viml/parser/parser.h | 2 +- 3 files changed, 331 insertions(+), 155 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 3f30fe2a0e..75fcb17bf6 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -361,11 +361,12 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) // Scope: `s:`, etc. } else if (ret.len == 1 && pline.size > 1 - && strchr("sgvbwtla", schar) != NULL + && memchr(EXPR_VAR_SCOPE_LIST, schar, + sizeof(EXPR_VAR_SCOPE_LIST)) != NULL && pline.data[ret.len] == ':' && !(flags & kELFlagForbidScope)) { ret.len++; - ret.data.var.scope = schar; + ret.data.var.scope = (ExprVarScope)schar; CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD); ret.data.var.autoload = ( memchr(pline.data + 2, AUTOLOAD_CHAR, ret.len - 2) @@ -408,14 +409,13 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) ret.type = kExprLexOption; if (pline.size > 2 && pline.data[2] == ':' - && strchr("gl", pline.data[1]) != NULL) { + && memchr(EXPR_OPT_SCOPE_LIST, pline.data[1], + sizeof(EXPR_OPT_SCOPE_LIST)) != NULL) { ret.len += 2; - ret.data.opt.scope = (pline.data[1] == 'g' - ? kExprLexOptGlobal - : kExprLexOptLocal); + ret.data.opt.scope = (ExprOptScope)pline.data[1]; ret.data.opt.name = pline.data + 3; } else { - ret.data.opt.scope = kExprLexOptUnspecified; + ret.data.opt.scope = kExprOptScopeUnspecified; ret.data.opt.name = pline.data + 1; } const char *p = ret.data.opt.name; @@ -637,9 +637,9 @@ static const char *const eltkn_mul_type_tab[] = { }; static const char *const eltkn_opt_scope_tab[] = { - [kExprLexOptUnspecified] = "Unspecified", - [kExprLexOptGlobal] = "Global", - [kExprLexOptLocal] = "Local", + [kExprOptScopeUnspecified] = "Unspecified", + [kExprOptScopeGlobal] = "Global", + [kExprOptScopeLocal] = "Local", }; /// Represent `int` character as a string @@ -990,67 +990,25 @@ static inline ExprASTNode *viml_pexpr_new_node(const ExprASTNodeType type) return ret; } -static const ExprOpLvl node_type_to_op_lvl[] = { - [kExprNodeMissing] = kEOpLvlInvalid, - [kExprNodeOpMissing] = kEOpLvlMultiplication, +static struct { + ExprOpLvl lvl; + ExprOpAssociativity ass; +} node_type_to_node_props[] = { + [kExprNodeMissing] = { kEOpLvlInvalid, kEOpAssNo, }, + [kExprNodeOpMissing] = { kEOpLvlMultiplication, kEOpAssNo }, - [kExprNodeNested] = kEOpLvlParens, + [kExprNodeNested] = { kEOpLvlParens, kEOpAssNo }, // Note: below nodes are kEOpLvlSubscript for “binary operator” itself, but // kEOpLvlParens when it comes to inside the parenthesis. - [kExprNodeCall] = kEOpLvlParens, - [kExprNodeSubscript] = kEOpLvlParens, + [kExprNodeCall] = { kEOpLvlParens, kEOpAssNo }, + [kExprNodeSubscript] = { kEOpLvlParens, kEOpAssNo }, - [kExprNodeUnknownFigure] = kEOpLvlParens, - [kExprNodeLambda] = kEOpLvlParens, - [kExprNodeDictLiteral] = kEOpLvlParens, - [kExprNodeListLiteral] = kEOpLvlParens, + [kExprNodeUnknownFigure] = { kEOpLvlParens, kEOpAssLeft }, + [kExprNodeLambda] = { kEOpLvlParens, kEOpAssNo }, + [kExprNodeDictLiteral] = { kEOpLvlParens, kEOpAssNo }, + [kExprNodeListLiteral] = { kEOpLvlParens, kEOpAssNo }, - [kExprNodeArrow] = kEOpLvlArrow, - - [kExprNodeComma] = kEOpLvlComma, - - [kExprNodeColon] = kEOpLvlColon, - - [kExprNodeTernary] = kEOpLvlTernary, - - [kExprNodeTernaryValue] = kEOpLvlTernaryValue, - - [kExprNodeComparison] = kEOpLvlComparison, - - [kExprNodeBinaryPlus] = kEOpLvlAddition, - [kExprNodeConcat] = kEOpLvlAddition, - - [kExprNodeUnaryPlus] = kEOpLvlUnary, - - [kExprNodeConcatOrSubscript] = kEOpLvlSubscript, - - [kExprNodeCurlyBracesIdentifier] = kEOpLvlComplexIdentifier, - - [kExprNodeComplexIdentifier] = kEOpLvlValue, - [kExprNodePlainIdentifier] = kEOpLvlValue, - [kExprNodePlainKey] = kEOpLvlValue, - [kExprNodeRegister] = kEOpLvlValue, - [kExprNodeInteger] = kEOpLvlValue, - [kExprNodeFloat] = kEOpLvlValue, -}; - -static const ExprOpAssociativity node_type_to_op_ass[] = { - [kExprNodeMissing] = kEOpAssNo, - [kExprNodeOpMissing] = kEOpAssNo, - - [kExprNodeNested] = kEOpAssNo, - [kExprNodeCall] = kEOpAssNo, - [kExprNodeSubscript] = kEOpAssNo, - - [kExprNodeUnknownFigure] = kEOpAssLeft, - [kExprNodeLambda] = kEOpAssNo, - [kExprNodeDictLiteral] = kEOpAssNo, - [kExprNodeListLiteral] = kEOpAssNo, - - // Does not really matter. - [kExprNodeArrow] = kEOpAssNo, - - [kExprNodeColon] = kEOpAssNo, + [kExprNodeArrow] = { kEOpLvlArrow, kEOpAssNo }, // Right associativity for comma because this means easier access to arguments // list, etc: for "[a, b, c, d]" you can access "a" in one step if it is @@ -1059,29 +1017,48 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { // traverse all three comma() structures. And with comma operator (including // actual comma operator from C which is not present in VimL) nobody cares // about associativity, only about order of execution. - [kExprNodeComma] = kEOpAssRight, + [kExprNodeComma] = { kEOpLvlComma, kEOpAssRight }, + + // Colons are not eligible for chaining, so nobody cares about associativity. + [kExprNodeColon] = { kEOpLvlColon, kEOpAssNo }, + + [kExprNodeTernary] = { kEOpLvlTernary, kEOpAssRight }, - [kExprNodeTernary] = kEOpAssRight, + [kExprNodeOr] = { kEOpLvlOr, kEOpAssLeft }, - [kExprNodeTernaryValue] = kEOpAssRight, + [kExprNodeAnd] = { kEOpLvlAnd, kEOpAssLeft }, - [kExprNodeComparison] = kEOpAssRight, + [kExprNodeTernaryValue] = { kEOpLvlTernaryValue, kEOpAssRight }, - [kExprNodeBinaryPlus] = kEOpAssLeft, - [kExprNodeConcat] = kEOpAssLeft, + [kExprNodeComparison] = { kEOpLvlComparison, kEOpAssRight }, - [kExprNodeUnaryPlus] = kEOpAssNo, + [kExprNodeBinaryPlus] = { kEOpLvlAddition, kEOpAssLeft }, + [kExprNodeBinaryMinus] = { kEOpLvlAddition, kEOpAssLeft }, + [kExprNodeConcat] = { kEOpLvlAddition, kEOpAssLeft }, - [kExprNodeConcatOrSubscript] = kEOpAssLeft, + [kExprNodeMultiplication] = { kEOpLvlMultiplication, kEOpAssLeft }, + [kExprNodeDivision] = { kEOpLvlMultiplication, kEOpAssLeft }, + [kExprNodeMod] = { kEOpLvlMultiplication, kEOpAssLeft }, - [kExprNodeCurlyBracesIdentifier] = kEOpAssLeft, + [kExprNodeUnaryPlus] = { kEOpLvlUnary, kEOpAssNo }, + [kExprNodeUnaryMinus] = { kEOpLvlUnary, kEOpAssNo }, + [kExprNodeNot] = { kEOpLvlUnary, kEOpAssNo }, - [kExprNodeComplexIdentifier] = kEOpAssLeft, - [kExprNodePlainIdentifier] = kEOpAssNo, - [kExprNodePlainKey] = kEOpAssNo, - [kExprNodeRegister] = kEOpAssNo, - [kExprNodeInteger] = kEOpAssNo, - [kExprNodeFloat] = kEOpAssNo, + [kExprNodeConcatOrSubscript] = { kEOpLvlSubscript, kEOpAssLeft }, + + [kExprNodeCurlyBracesIdentifier] = { kEOpLvlComplexIdentifier, kEOpAssLeft }, + + [kExprNodeComplexIdentifier] = { kEOpLvlValue, kEOpAssLeft }, + + [kExprNodePlainIdentifier] = { kEOpLvlValue, kEOpAssNo }, + [kExprNodePlainKey] = { kEOpLvlValue, kEOpAssNo }, + [kExprNodeRegister] = { kEOpLvlValue, kEOpAssNo }, + [kExprNodeInteger] = { kEOpLvlValue, kEOpAssNo }, + [kExprNodeFloat] = { kEOpLvlValue, kEOpAssNo }, + [kExprNodeDoubleQuotedString] = { kEOpLvlValue, kEOpAssNo }, + [kExprNodeSingleQuotedString] = { kEOpLvlValue, kEOpAssNo }, + [kExprNodeOption] = { kEOpLvlValue, kEOpAssNo }, + [kExprNodeEnvironment] = { kEOpLvlValue, kEOpAssNo }, }; /// Get AST node priority level @@ -1094,7 +1071,7 @@ static const ExprOpAssociativity node_type_to_op_ass[] = { static inline ExprOpLvl node_lvl(const ExprASTNode node) FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT { - return node_type_to_op_lvl[node.type]; + return node_type_to_node_props[node.type].lvl; } /// Get AST node associativity, to be used for operator nodes primary @@ -1107,7 +1084,7 @@ static inline ExprOpLvl node_lvl(const ExprASTNode node) static inline ExprOpAssociativity node_ass(const ExprASTNode node) FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT { - return node_type_to_op_ass[node.type]; + return node_type_to_node_props[node.type].ass; } /// Handle binary operator @@ -1837,7 +1814,7 @@ viml_pexpr_parse_process_token: is_concat_or_subscript && (cur_token.type == kExprLexPlainIdentifier ? (!cur_token.data.var.autoload - && cur_token.data.var.scope == 0) + && cur_token.data.var.scope == kExprVarScopeMissing) : (cur_token.type == kExprLexNumber)) && prev_token.type != kExprLexSpacing); if (is_concat_or_subscript && !node_is_key) { @@ -1856,7 +1833,7 @@ viml_pexpr_parse_process_token: && tok_type != kExprLexArrow) || (want_node == kENodeArgument && !(cur_token.type == kExprLexPlainIdentifier - && cur_token.data.var.scope == 0 + && cur_token.data.var.scope == kExprVarScopeMissing && !cur_token.data.var.autoload) && tok_type != kExprLexArrow)) { lambda_node->data.fig.type_guesses.allow_lambda = false; @@ -1885,6 +1862,8 @@ viml_pexpr_parse_process_token: || want_node == kENodeArgumentSeparator || want_node == kENodeArgument); switch (tok_type) { + case kExprLexMissing: + case kExprLexSpacing: case kExprLexEOC: { assert(false); } @@ -1894,31 +1873,111 @@ viml_pexpr_parse_process_token: goto viml_pexpr_parse_process_token; } case kExprLexRegister: { - if (want_node == kENodeValue) { - NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeRegister); - cur_node->data.reg.name = cur_token.data.reg.name; - *top_node_p = cur_node; - want_node = kENodeOperator; - HL_CUR_TOKEN(Register); - } else { + if (want_node == kENodeOperator) { // Register in operator position: e.g. @a @a OP_MISSING; } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeRegister); + cur_node->data.reg.name = cur_token.data.reg.name; + *top_node_p = cur_node; + want_node = kENodeOperator; + HL_CUR_TOKEN(Register); break; } - case kExprLexPlus: { - if (want_node == kENodeValue) { - // Value level: assume unary plus - NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnaryPlus); - *top_node_p = cur_node; - kvi_push(ast_stack, &cur_node->children); - HL_CUR_TOKEN(UnaryPlus); - } else { - NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeBinaryPlus); - ADD_OP_NODE(cur_node); - HL_CUR_TOKEN(BinaryPlus); +#define SIMPLE_UB_OP(op) \ + case kExprLex##op: { \ + if (want_node == kENodeValue) { \ + /* Value level: assume unary operator. */ \ + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnary##op); \ + *top_node_p = cur_node; \ + kvi_push(ast_stack, &cur_node->children); \ + HL_CUR_TOKEN(Unary##op); \ + } else { \ + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeBinary##op); \ + ADD_OP_NODE(cur_node); \ + HL_CUR_TOKEN(Binary##op); \ + } \ + want_node = kENodeValue; \ + break; \ + } + SIMPLE_UB_OP(Plus) + SIMPLE_UB_OP(Minus) +#undef SIMPLE_UB_OP +#define SIMPLE_B_OP(op, msg) \ + case kExprLex##op: { \ + ADD_VALUE_IF_MISSING(_("E15: Unexpected " msg ": %.*s")); \ + NEW_NODE_WITH_CUR_POS(cur_node, kExprNode##op); \ + HL_CUR_TOKEN(op); \ + ADD_OP_NODE(cur_node); \ + break; \ + } + SIMPLE_B_OP(Or, "or operator") + SIMPLE_B_OP(And, "and operator") +#undef SIMPLE_B_OP + case kExprLexMultiplication: { + ADD_VALUE_IF_MISSING( + _("E15: Unexpected multiplication-like operator: %.*s")); + switch (cur_token.data.mul.type) { +#define MUL_OP(lex_op_tail, node_op_tail) \ + case kExprLexMul##lex_op_tail: { \ + NEW_NODE_WITH_CUR_POS(cur_node, kExprNode##node_op_tail); \ + HL_CUR_TOKEN(node_op_tail); \ + break; \ + } + MUL_OP(Mul, Multiplication) + MUL_OP(Div, Division) + MUL_OP(Mod, Mod) +#undef MUL_OP } - want_node = kENodeValue; + ADD_OP_NODE(cur_node); + break; + } + case kExprLexOption: { + if (want_node == kENodeOperator) { + OP_MISSING; + } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOption); + cur_node->data.opt.ident = cur_token.data.opt.name; + cur_node->data.opt.ident_len = cur_token.data.opt.len; + cur_node->data.opt.scope = cur_token.data.opt.scope; + *top_node_p = cur_node; + want_node = kENodeOperator; + viml_parser_highlight(pstate, cur_token.start, 1, HL(OptionSigil)); + const size_t scope_shift = ( + cur_token.data.opt.scope == kExprOptScopeUnspecified ? 0 : 2); + if (scope_shift) { + viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1, + HL(OptionScope)); + viml_parser_highlight(pstate, shifted_pos(cur_token.start, 2), 1, + HL(OptionScopeDelimiter)); + } + viml_parser_highlight( + pstate, shifted_pos(cur_token.start, scope_shift + 1), + cur_token.len - scope_shift + 1, HL(Option)); + break; + } + case kExprLexEnv: { + if (want_node == kENodeOperator) { + OP_MISSING; + } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeEnvironment); + cur_node->data.env.ident = pline.data + cur_token.start.col + 1; + cur_node->data.env.ident_len = cur_token.len - 1; + *top_node_p = cur_node; + want_node = kENodeOperator; + viml_parser_highlight(pstate, cur_token.start, 1, HL(EnvironmentSigil)); + viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), + cur_token.len - 1, HL(Environment)); + break; + } + case kExprLexNot: { + if (want_node == kENodeOperator) { + OP_MISSING; + } + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeNot); + *top_node_p = cur_node; + kvi_push(ast_stack, &cur_node->children); + HL_CUR_TOKEN(Not); break; } case kExprLexComparison: { @@ -2359,9 +2418,8 @@ viml_pexpr_parse_figure_brace_closing_error: ? kExprNodePlainKey : kExprNodePlainIdentifier)); cur_node->data.var.scope = cur_token.data.var.scope; - const size_t scope_shift = (cur_token.data.var.scope == 0 - ? 0 - : 2); + const size_t scope_shift = ( + cur_token.data.var.scope == kExprVarScopeMissing ? 0 : 2); cur_node->data.var.ident = (pline.data + cur_token.start.col + scope_shift); cur_node->data.var.ident_len = cur_token.len - scope_shift; @@ -2373,16 +2431,14 @@ viml_pexpr_parse_figure_brace_closing_error: viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), 1, HL(IdentifierScopeDelimiter)); } - if (scope_shift < cur_token.len) { - viml_parser_highlight(pstate, shifted_pos(cur_token.start, - scope_shift), - cur_token.len - scope_shift, - (node_is_key - ? HL(IdentifierKey) - : HL(Identifier))); - } + viml_parser_highlight(pstate, shifted_pos(cur_token.start, + scope_shift), + cur_token.len - scope_shift, + (node_is_key + ? HL(IdentifierKey) + : HL(Identifier))); } else { - if (cur_token.data.var.scope == 0) { + if (cur_token.data.var.scope == kExprVarScopeMissing) { ADD_IDENT( do { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier); @@ -2606,9 +2662,85 @@ viml_pexpr_parse_end: cur_node->start); break; } - case kExprNodeBinaryPlus: + case kExprNodeListLiteral: { + // For whatever reason "[1" yields "E696: Missing comma in list" error + // in Vim while "[1," yields E697. + east_set_error( + pstate, &ast.err, + _("E697: Missing end of List ']': %.*s"), + cur_node->start); + break; + } + case kExprNodeDictLiteral: { + // Same problem like with list literal with E722 (missing comma) vs + // E723, but additionally just "{" yields only E15. + east_set_error( + pstate, &ast.err, + _("E723: Missing end of Dictionary '}': %.*s"), + cur_node->start); + break; + } + case kExprNodeUnknownFigure: { + east_set_error( + pstate, &ast.err, + _("E15: Missing closing figure brace: %.*s"), + cur_node->start); + break; + } + case kExprNodeLambda: { + east_set_error( + pstate, &ast.err, + _("E15: Missing closing figure brace for lambda: %.*s"), + cur_node->start); + break; + } + case kExprNodeCurlyBracesIdentifier: { + // Until trailing "}" it is impossible to distinguish curly braces + // identifier and dictionary, so it must not appear in the stack like + // this. + assert(false); + } + case kExprNodeInteger: + case kExprNodeFloat: + case kExprNodeSingleQuotedString: + case kExprNodeDoubleQuotedString: + case kExprNodeOption: + case kExprNodeEnvironment: + case kExprNodeRegister: + case kExprNodePlainIdentifier: + case kExprNodePlainKey: { + // These are plain values and not containers, for them it should only + // be possible to show up in the topmost stack element, but it was + // unconditionally popped at the start. + assert(false); + } + case kExprNodeComma: + case kExprNodeColon: + case kExprNodeArrow: { + // It is actually only valid inside something else, but everything + // where one of the above is valid requires to be closed and thus is + // to be caught later. + break; + } + case kExprNodeConcatOrSubscript: + case kExprNodeComplexIdentifier: + case kExprNodeSubscript: { + // FIXME: Investigate whether above are OK to be present in the stack. + break; + } + case kExprNodeMod: + case kExprNodeDivision: + case kExprNodeMultiplication: + case kExprNodeNot: + case kExprNodeAnd: + case kExprNodeOr: + case kExprNodeConcat: + case kExprNodeComparison: + case kExprNodeUnaryMinus: case kExprNodeUnaryPlus: - case kExprNodeRegister: { + case kExprNodeBinaryMinus: + case kExprNodeTernary: + case kExprNodeBinaryPlus: { // It is OK to see these in the stack. break; } @@ -2621,7 +2753,6 @@ viml_pexpr_parse_end: } break; } - // TODO(ZyX-I): handle other values } } } diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index a09cdde4c0..0198852bed 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -61,6 +61,36 @@ typedef enum { kExprCmpIdentical, ///< `is` or `isnot` } ExprComparisonType; +/// All possible option scopes +typedef enum { + kExprOptScopeUnspecified = 0, + kExprOptScopeGlobal = 'g', + kExprOptScopeLocal = 'l', +} ExprOptScope; + +#define EXPR_OPT_SCOPE_LIST \ + ((char *)(char[]){ kExprOptScopeGlobal, kExprOptScopeLocal }) + +/// All possible variable scopes +typedef enum { + kExprVarScopeMissing = 0, + kExprVarScopeScript = 's', + kExprVarScopeGlobal = 'g', + kExprVarScopeVim = 'v', + kExprVarScopeBuffer = 'b', + kExprVarScopeWindow = 'w', + kExprVarScopeTabpage = 't', + kExprVarScopeLocal = 'l', + kExprVarScopeArguments = 'a', +} ExprVarScope; + +#define EXPR_VAR_SCOPE_LIST \ + ((char[]) { \ + kExprVarScopeScript, kExprVarScopeGlobal, kExprVarScopeVim, \ + kExprVarScopeBuffer, kExprVarScopeWindow, kExprVarScopeTabpage, \ + kExprVarScopeLocal, kExprVarScopeBuffer, kExprVarScopeArguments, \ + }) + /// Lexer token typedef struct { ParserPosition start; @@ -96,15 +126,11 @@ typedef struct { struct { const char *name; ///< Option name start. size_t len; ///< Option name length. - enum { - kExprLexOptUnspecified = 0, - kExprLexOptGlobal = 1, - kExprLexOptLocal = 2, - } scope; ///< Option scope: &l:, &g: or not specified. + ExprOptScope scope; ///< Option scope: &l:, &g: or not specified. } opt; ///< Option properties. struct { - int scope; ///< Scope character or 0 if not present. + ExprVarScope scope; ///< Scope character or 0 if not present. bool autoload; ///< Has autoload characters. } var; ///< For kExprLexPlainIdentifier @@ -150,53 +176,63 @@ typedef enum { /// Expression AST node type typedef enum { - kExprNodeMissing = 'X', - kExprNodeOpMissing = '_', - kExprNodeTernary = '?', ///< Ternary operator. - kExprNodeTernaryValue = 'C', ///< Ternary operator, colon. - kExprNodeRegister = '@', ///< Register. - kExprNodeSubscript = 's', ///< Subscript. - kExprNodeListLiteral = 'l', ///< List literal. - kExprNodeUnaryPlus = 'p', - kExprNodeBinaryPlus = '+', - kExprNodeNested = 'e', ///< Nested parenthesised expression. - kExprNodeCall = 'c', ///< Function call. + kExprNodeMissing = 0, + kExprNodeOpMissing, + kExprNodeTernary, ///< Ternary operator. + kExprNodeTernaryValue, ///< Ternary operator, colon. + kExprNodeRegister, ///< Register. + kExprNodeSubscript, ///< Subscript. + kExprNodeListLiteral, ///< List literal. + kExprNodeUnaryPlus, + kExprNodeBinaryPlus, + kExprNodeNested, ///< Nested parenthesised expression. + kExprNodeCall, ///< Function call. /// Plain identifier: simple variable/function name /// /// Looks like "string", "g:Foo", etc: consists from a single /// kExprLexPlainIdentifier token. - kExprNodePlainIdentifier = 'i', + kExprNodePlainIdentifier, /// Plain dictionary key, for use with kExprNodeConcatOrSubscript - kExprNodePlainKey = 'k', + kExprNodePlainKey, /// Complex identifier: variable/function name with curly braces - kExprNodeComplexIdentifier = 'I', + kExprNodeComplexIdentifier, /// Figure brace expression which is not yet known /// /// May resolve to any of kExprNodeDictLiteral, kExprNodeLambda or /// kExprNodeCurlyBracesIdentifier. - kExprNodeUnknownFigure = '{', - kExprNodeLambda = '\\', ///< Lambda. - kExprNodeDictLiteral = 'd', ///< Dictionary literal. - kExprNodeCurlyBracesIdentifier= '}', ///< Part of the curly braces name. - kExprNodeComma = ',', ///< Comma “operator”. - kExprNodeColon = ':', ///< Colon “operator”. - kExprNodeArrow = '>', ///< Arrow “operator”. - kExprNodeComparison = '=', ///< Various comparison operators. + kExprNodeUnknownFigure, + kExprNodeLambda, ///< Lambda. + kExprNodeDictLiteral, ///< Dictionary literal. + kExprNodeCurlyBracesIdentifier, ///< Part of the curly braces name. + kExprNodeComma, ///< Comma “operator”. + kExprNodeColon, ///< Colon “operator”. + kExprNodeArrow, ///< Arrow “operator”. + kExprNodeComparison, ///< Various comparison operators. /// Concat operator /// /// To be only used in cases when it is known for sure it is not a subscript. - kExprNodeConcat = '.', + kExprNodeConcat, /// Concat or subscript operator /// /// For cases when it is not obvious whether expression is a concat or /// a subscript. May only have either number or plain identifier as the second /// child. To make it easier to avoid curly braces in place of /// kExprNodePlainIdentifier node kExprNodePlainKey is used. - kExprNodeConcatOrSubscript = 'S', - kExprNodeInteger = '0', ///< Integral number. - kExprNodeFloat = '1', ///< Floating-point number. - kExprNodeSingleQuotedString = '\'', - kExprNodeDoubleQuotedString = '"', + kExprNodeConcatOrSubscript, + kExprNodeInteger, ///< Integral number. + kExprNodeFloat, ///< Floating-point number. + kExprNodeSingleQuotedString, + kExprNodeDoubleQuotedString, + kExprNodeOr, + kExprNodeAnd, + kExprNodeUnaryMinus, + kExprNodeBinaryMinus, + kExprNodeNot, + kExprNodeMultiplication, + kExprNodeDivision, + kExprNodeMod, + kExprNodeOption, + kExprNodeEnvironment, } ExprASTNodeType; typedef struct expr_ast_node ExprASTNode; @@ -230,7 +266,7 @@ struct expr_ast_node { size_t opening_hl_idx; } fig; ///< For kExprNodeUnknownFigure. struct { - int scope; ///< Scope character or 0 if not present. + ExprVarScope scope; ///< Scope character or 0 if not present. /// Actual identifier without scope. /// /// Points to inside parser reader state. @@ -256,6 +292,15 @@ struct expr_ast_node { size_t size; } str; ///< For kExprNodeSingleQuotedString and ///< kExprNodeDoubleQuotedString. + struct { + const char *ident; ///< Option name start. + size_t ident_len; ///< Option name length. + ExprOptScope scope; ///< Option scope: &l:, &g: or not specified. + } opt; ///< For kExprNodeOption. + struct { + const char *ident; ///< Environment variable name start. + size_t ident_len; ///< Environment variable name length. + } env; ///< For kExprNodeEnvironment. } data; }; diff --git a/src/nvim/viml/parser/parser.h b/src/nvim/viml/parser/parser.h index a17edac403..10ced57977 100644 --- a/src/nvim/viml/parser/parser.h +++ b/src/nvim/viml/parser/parser.h @@ -173,7 +173,7 @@ static inline void viml_parser_highlight(ParserState *const pstate, const size_t len, const char *const group) { - if (pstate->colors == NULL) { + if (pstate->colors == NULL || len == 0) { return; } // TODO(ZyX-I): May do some assert() sanitizing here. -- cgit From c286155bfa53c828ebe5479fd81a544740a92403 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 15 Oct 2017 19:06:41 +0300 Subject: viml/parser/expressions: Create tests for latest additions --- src/nvim/viml/parser/expressions.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 75fcb17bf6..8928179349 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -1937,9 +1937,22 @@ viml_pexpr_parse_process_token: OP_MISSING; } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeOption); - cur_node->data.opt.ident = cur_token.data.opt.name; - cur_node->data.opt.ident_len = cur_token.data.opt.len; - cur_node->data.opt.scope = cur_token.data.opt.scope; + if (cur_token.type == kExprLexInvalid) { + assert(cur_token.len == 1 + || (cur_token.len == 3 + && pline.data[cur_token.start.col + 2] == ':')); + cur_node->data.opt.ident = ( + pline.data + cur_token.start.col + cur_token.len); + cur_node->data.opt.ident_len = 0; + cur_node->data.opt.scope = ( + cur_token.len == 3 + ? (ExprOptScope)pline.data[cur_token.start.col + 1] + : kExprOptScopeUnspecified); + } else { + cur_node->data.opt.ident = cur_token.data.opt.name; + cur_node->data.opt.ident_len = cur_token.data.opt.len; + cur_node->data.opt.scope = cur_token.data.opt.scope; + } *top_node_p = cur_node; want_node = kENodeOperator; viml_parser_highlight(pstate, cur_token.start, 1, HL(OptionSigil)); @@ -1953,7 +1966,7 @@ viml_pexpr_parse_process_token: } viml_parser_highlight( pstate, shifted_pos(cur_token.start, scope_shift + 1), - cur_token.len - scope_shift + 1, HL(Option)); + cur_token.len - (scope_shift + 1), HL(Option)); break; } case kExprLexEnv: { @@ -1963,6 +1976,10 @@ viml_pexpr_parse_process_token: NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeEnvironment); cur_node->data.env.ident = pline.data + cur_token.start.col + 1; cur_node->data.env.ident_len = cur_token.len - 1; + if (cur_node->data.env.ident_len == 0) { + ERROR_FROM_TOKEN_AND_MSG(cur_token, + _("E15: Environment variable name missing")); + } *top_node_p = cur_node; want_node = kENodeOperator; viml_parser_highlight(pstate, cur_token.start, 1, HL(EnvironmentSigil)); -- cgit From 6c19cbef2611c389da6f3e06d8c8eb635d065774 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 15 Oct 2017 20:05:35 +0300 Subject: viml/parser/expressions,tests: Add AST freeing, with sanity checks --- src/nvim/viml/parser/expressions.c | 239 +++++++++++++++++++++++++++++++------ 1 file changed, 205 insertions(+), 34 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 8928179349..2f7ec6bcca 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -642,37 +642,6 @@ static const char *const eltkn_opt_scope_tab[] = { [kExprOptScopeLocal] = "Local", }; -/// Represent `int` character as a string -/// -/// Converts -/// - ASCII digits into '{digit}' -/// - ASCII printable characters into a single-character strings -/// - everything else to numbers. -/// -/// @param[in] ch Character to convert. -/// -/// @return Converted string, stored in a static buffer (overriden after each -/// call). -static const char *intchar2str(const int ch) - FUNC_ATTR_WARN_UNUSED_RESULT -{ - static char buf[sizeof(int) * 3 + 1]; - if (' ' <= ch && ch < 0x7f) { - if (ascii_isdigit(ch)) { - buf[0] = '\''; - buf[1] = (char)ch; - buf[2] = '\''; - buf[3] = NUL; - } else { - buf[0] = (char)ch; - buf[1] = NUL; - } - } else { - snprintf(buf, sizeof(buf), "%i", ch); - } - return buf; -} - /// Represent token as a string /// /// Intended for testing and debugging purposes. @@ -756,6 +725,78 @@ viml_pexpr_repr_token_end: return ret; } +static const char *const east_node_type_tab[] = { + [kExprNodeMissing] = "Missing", + [kExprNodeOpMissing] = "OpMissing", + [kExprNodeTernary] = "Ternary", + [kExprNodeTernaryValue] = "TernaryValue", + [kExprNodeRegister] = "Register", + [kExprNodeSubscript] = "Subscript", + [kExprNodeListLiteral] = "ListLiteral", + [kExprNodeUnaryPlus] = "UnaryPlus", + [kExprNodeBinaryPlus] = "BinaryPlus", + [kExprNodeNested] = "Nested", + [kExprNodeCall] = "Call", + [kExprNodePlainIdentifier] = "PlainIdentifier", + [kExprNodePlainKey] = "PlainKey", + [kExprNodeComplexIdentifier] = "ComplexIdentifier", + [kExprNodeUnknownFigure] = "UnknownFigure", + [kExprNodeLambda] = "Lambda", + [kExprNodeDictLiteral] = "DictLiteral", + [kExprNodeCurlyBracesIdentifier] = "CurlyBracesIdentifier", + [kExprNodeComma] = "Comma", + [kExprNodeColon] = "Colon", + [kExprNodeArrow] = "Arrow", + [kExprNodeComparison] = "Comparison", + [kExprNodeConcat] = "Concat", + [kExprNodeConcatOrSubscript] = "ConcatOrSubscript", + [kExprNodeInteger] = "Integer", + [kExprNodeFloat] = "Float", + [kExprNodeSingleQuotedString] = "SingleQuotedString", + [kExprNodeDoubleQuotedString] = "DoubleQuotedString", + [kExprNodeOr] = "Or", + [kExprNodeAnd] = "And", + [kExprNodeUnaryMinus] = "UnaryMinus", + [kExprNodeBinaryMinus] = "BinaryMinus", + [kExprNodeNot] = "Not", + [kExprNodeMultiplication] = "Multiplication", + [kExprNodeDivision] = "Division", + [kExprNodeMod] = "Mod", + [kExprNodeOption] = "Option", + [kExprNodeEnvironment] = "Environment", +}; + +/// Represent `int` character as a string +/// +/// Converts +/// - ASCII digits into '{digit}' +/// - ASCII printable characters into a single-character strings +/// - everything else to numbers. +/// +/// @param[in] ch Character to convert. +/// +/// @return Converted string, stored in a static buffer (overriden after each +/// call). +static const char *intchar2str(const int ch) + FUNC_ATTR_WARN_UNUSED_RESULT +{ + static char buf[sizeof(int) * 3 + 1]; + if (' ' <= ch && ch < 0x7f) { + if (ascii_isdigit(ch)) { + buf[0] = '\''; + buf[1] = (char)ch; + buf[2] = '\''; + buf[3] = NUL; + } else { + buf[0] = (char)ch; + buf[1] = NUL; + } + } else { + snprintf(buf, sizeof(buf), "%i", ch); + } + return buf; +} + #ifdef UNIT_TESTING #include @@ -767,9 +808,9 @@ static inline void viml_pexpr_debug_print_ast_node( if (*eastnode_p == NULL) { fprintf(stderr, "%s %p : NULL\n", prefix, (void *)eastnode_p); } else { - fprintf(stderr, "%s %p : %p : %c : %zu:%zu:%zu\n", + fprintf(stderr, "%s %p : %p : %s : %zu:%zu:%zu\n", prefix, (void *)eastnode_p, (void *)(*eastnode_p), - (*eastnode_p)->type, (*eastnode_p)->start.line, + east_node_type_tab[(*eastnode_p)->type], (*eastnode_p)->start.line, (*eastnode_p)->start.col, (*eastnode_p)->len); } } @@ -800,11 +841,141 @@ static inline void viml_pexpr_debug_print_token( #define PSTACK_P(msg) \ viml_pexpr_debug_print_ast_stack(ast_stack, #msg) #define PNODE_P(eastnode_p, msg) \ - viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)ast_stack, #msg) + viml_pexpr_debug_print_ast_node((const ExprASTNode *const *)eastnode_p, \ + (#msg)) #define PTOKEN(tkn) \ viml_pexpr_debug_print_token(pstate, tkn) #endif +#ifndef NDEBUG +static const uint8_t node_maxchildren[] = { + [kExprNodeMissing] = 0, + [kExprNodeOpMissing] = 2, + [kExprNodeTernary] = 2, + [kExprNodeTernaryValue] = 2, + [kExprNodeRegister] = 0, + [kExprNodeSubscript] = 2, + [kExprNodeListLiteral] = 1, + [kExprNodeUnaryPlus] = 1, + [kExprNodeBinaryPlus] = 2, + [kExprNodeNested] = 1, + [kExprNodeCall] = 2, + [kExprNodePlainIdentifier] = 0, + [kExprNodePlainKey] = 0, + [kExprNodeComplexIdentifier] = 2, + [kExprNodeUnknownFigure] = 1, + [kExprNodeLambda] = 2, + [kExprNodeDictLiteral] = 1, + [kExprNodeCurlyBracesIdentifier] = 1, + [kExprNodeComma] = 2, + [kExprNodeColon] = 2, + [kExprNodeArrow] = 2, + [kExprNodeComparison] = 2, + [kExprNodeConcat] = 2, + [kExprNodeConcatOrSubscript] = 2, + [kExprNodeInteger] = 0, + [kExprNodeFloat] = 0, + [kExprNodeSingleQuotedString] = 0, + [kExprNodeDoubleQuotedString] = 0, + [kExprNodeOr] = 2, + [kExprNodeAnd] = 2, + [kExprNodeUnaryMinus] = 1, + [kExprNodeBinaryMinus] = 2, + [kExprNodeNot] = 1, + [kExprNodeMultiplication] = 2, + [kExprNodeDivision] = 2, + [kExprNodeMod] = 2, + [kExprNodeOption] = 0, + [kExprNodeEnvironment] = 0, +}; +#endif + +/// Free memory occupied by AST +/// +/// @param ast AST stack to free. +void viml_pexpr_free_ast(ExprAST ast) +{ + ExprASTStack ast_stack; + kvi_init(ast_stack); + kvi_push(ast_stack, &ast.root); + while (kv_size(ast_stack)) { + ExprASTNode **const cur_node = kv_last(ast_stack); +#ifndef NDEBUG + // Explicitly check for AST recursiveness. + for (size_t i = 0 ; i < kv_size(ast_stack) - 1 ; i++) { + assert(*kv_A(ast_stack, i) != *cur_node); + } +#endif + if (*cur_node == NULL) { + assert(kv_size(ast_stack) == 1); + kv_drop(ast_stack, 1); + } else if ((*cur_node)->children != NULL) { +#ifndef NDEBUG + const uint8_t maxchildren = node_maxchildren[(*cur_node)->type]; + assert(maxchildren > 0); + assert(maxchildren <= 2); + assert(maxchildren == 1 + ? (*cur_node)->children->next == NULL + : ((*cur_node)->children->next == NULL + || (*cur_node)->children->next->next == NULL)); +#endif + kvi_push(ast_stack, &(*cur_node)->children); + } else if ((*cur_node)->next != NULL) { + kvi_push(ast_stack, &(*cur_node)->next); + } else if (*cur_node != NULL) { + kv_drop(ast_stack, 1); + switch ((*cur_node)->type) { + case kExprNodeDoubleQuotedString: + case kExprNodeSingleQuotedString: { + xfree((*cur_node)->data.str.value); + break; + } + case kExprNodeMissing: + case kExprNodeOpMissing: + case kExprNodeTernary: + case kExprNodeTernaryValue: + case kExprNodeRegister: + case kExprNodeSubscript: + case kExprNodeListLiteral: + case kExprNodeUnaryPlus: + case kExprNodeBinaryPlus: + case kExprNodeNested: + case kExprNodeCall: + case kExprNodePlainIdentifier: + case kExprNodePlainKey: + case kExprNodeComplexIdentifier: + case kExprNodeUnknownFigure: + case kExprNodeLambda: + case kExprNodeDictLiteral: + case kExprNodeCurlyBracesIdentifier: + case kExprNodeComma: + case kExprNodeColon: + case kExprNodeArrow: + case kExprNodeComparison: + case kExprNodeConcat: + case kExprNodeConcatOrSubscript: + case kExprNodeInteger: + case kExprNodeFloat: + case kExprNodeOr: + case kExprNodeAnd: + case kExprNodeUnaryMinus: + case kExprNodeBinaryMinus: + case kExprNodeNot: + case kExprNodeMultiplication: + case kExprNodeDivision: + case kExprNodeMod: + case kExprNodeOption: + case kExprNodeEnvironment: { + break; + } + } + xfree(*cur_node); + *cur_node = NULL; + } + } + kvi_destroy(ast_stack); +} + // start = s ternary_expr s EOC // ternary_expr = binop_expr // ( s Question s ternary_expr s Colon s ternary_expr s )? -- cgit From 57bb3346d95f325d4894d41c88d3a1434de2d2df Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 15 Oct 2017 20:43:16 +0300 Subject: viml/parser/expressions: Update some comments and add another check --- src/nvim/viml/parser/expressions.c | 104 ++++++++++++++----------------------- 1 file changed, 38 insertions(+), 66 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 2f7ec6bcca..370034943e 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -976,59 +976,6 @@ void viml_pexpr_free_ast(ExprAST ast) kvi_destroy(ast_stack); } -// start = s ternary_expr s EOC -// ternary_expr = binop_expr -// ( s Question s ternary_expr s Colon s ternary_expr s )? -// binop_expr = unaryop_expr ( binop unaryop_expr )? -// unaryop_expr = ( unaryop )? subscript_expr -// subscript_expr = subscript_expr subscript -// | value_expr -// subscript = Bracket('[') s ternary_expr s Bracket(']') -// | s Parenthesis('(') call_args Parenthesis(')') -// | Dot ( PlainIdentifier | Number )+ -// # Note: `s` before Parenthesis('(') is only valid if preceding subscript_expr -// # is PlainIdentifier -// value_expr = ( float | Number -// | DoubleQuotedString | SingleQuotedString -// | paren_expr -// | list_literal -// | lambda_literal -// | dict_literal -// | Environment -// | Option -// | Register -// | var ) -// float = Number Dot Number ( PlainIdentifier('e') ( Plus | Minus )? Number )? -// # Note: `1.2.3` is concat and not float. `"abc".2.3` is also concat without -// # floats. -// paren_expr = Parenthesis('(') s ternary_expr s Parenthesis(')') -// list_literal = Bracket('[') s -// ( ternary_expr s Comma s )* -// ternary_expr? s -// Bracket(']') -// dict_literal = FigureBrace('{') s -// ( ternary_expr s Colon s ternary_expr s Comma s )* -// ( ternary_expr s Colon s ternary_expr s )? -// FigureBrace('}') -// lambda_literal = FigureBrace('{') s -// ( PlainIdentifier s Comma s )* -// PlainIdentifier s -// Arrow s -// ternary_expr s -// FigureBrace('}') -// var = varchunk+ -// varchunk = PlainIdentifier -// | Comparison("is" | "is#" | "isnot" | "isnot#") -// | FigureBrace('{') s ternary_expr s FigureBrace('}') -// call_args = ( s ternary_expr s Comma s )* s ternary_expr? s -// binop = s ( Plus | Minus | Dot -// | Comparison -// | Multiplication -// | Or -// | And ) s -// unaryop = s ( Not | Plus | Minus ) s -// s = Spacing? -// // Binary operator precedence and associativity: // // Operator | Precedence | Associativity @@ -1885,6 +1832,14 @@ static void parse_quoted_string(ParserState *const pstate, kvi_destroy(shifts); } +/// Additional flags to pass to lexer depending on want_node +static const int want_node_to_lexer_flags[] = { + [kENodeValue] = kELFlagIsNotCmp, + [kENodeOperator] = kELFlagForbidScope, + [kENodeArgument] = kELFlagIsNotCmp, + [kENodeArgumentSeparator] = kELFlagForbidScope, +}; + /// Parse one VimL expression /// /// @param pstate Parser state. @@ -1902,26 +1857,25 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) }, .root = NULL, }; + // Expression stack contains current branch in AST tree: that is + // - Stack item 0 contains root of the tree, i.e. &ast->root. + // - Stack item i points to the previous stack items’ last child. + // + // When parser expects “value” node that is something like identifier or "[" + // (list start) last stack item contains NULL. Otherwise last stack item is + // supposed to contain last “finished” value: e.g. "1" or "+(1, 1)" (node + // representing "1+1"). + // + // Both kENodeValue and kENodeArgument stand for “value” nodes. ExprASTStack ast_stack; kvi_init(ast_stack); kvi_push(ast_stack, &ast.root); - // Expressions stack: - // 1. *last is NULL if want_node is kExprLexValue. Indicates where expression - // is to be put. - // 2. *last is not NULL otherwise, indicates current expression to be used as - // an operator argument. ExprASTWantedNode want_node = kENodeValue; LexExprToken prev_token = { .type = kExprLexMissing }; bool highlighted_prev_spacing = false; // Lambda node, valid when parsing lambda arguments only. ExprASTNode *lambda_node = NULL; do { - const int want_node_to_lexer_flags[] = { - [kENodeValue] = kELFlagIsNotCmp, - [kENodeOperator] = kELFlagForbidScope, - [kENodeArgument] = kELFlagIsNotCmp, - [kENodeArgumentSeparator] = kELFlagForbidScope, - }; const bool is_concat_or_subscript = ( want_node == kENodeValue && kv_size(ast_stack) > 1 @@ -1965,9 +1919,27 @@ viml_pexpr_parse_process_token: } const ParserLine pline = pstate->reader.lines.items[cur_token.start.line]; ExprASTNode **const top_node_p = kv_last(ast_stack); + assert(kv_size(ast_stack) >= 1); ExprASTNode *cur_node = NULL; - assert((want_node == kENodeValue || want_node == kENodeArgument) - == (*top_node_p == NULL)); +#ifndef NDEBUG + const bool want_value = ( + want_node == kENodeValue || want_node == kENodeArgument); + assert(want_value == (*top_node_p == NULL)); + assert(kv_A(ast_stack, 0) == &ast.root); + // Check that stack item i + 1 points to stack items’ i *last* child. + for (size_t i = 0; i + 1 < kv_size(ast_stack); i++) { + const bool item_null = (want_value && i + 2 == kv_size(ast_stack)); + assert((&(*kv_A(ast_stack, i))->children == kv_A(ast_stack, i + 1) + && (item_null + ? (*kv_A(ast_stack, i))->children == NULL + : (*kv_A(ast_stack, i))->children->next == NULL)) + || ((&(*kv_A(ast_stack, i))->children->next + == kv_A(ast_stack, i + 1)) + && (item_null + ? (*kv_A(ast_stack, i))->children->next == NULL + : (*kv_A(ast_stack, i))->children->next->next == NULL))); + } +#endif // Note: in Vim whether expression "cond?d.a:2" is valid depends both on // "cond" and whether "d" is a dictionary: expression is valid if condition // is true and "d" is a dictionary (with "a" key or it will complain about -- cgit From 3aa2c0d63ae488e302a89fdcdd650404cb2670fd Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 15 Oct 2017 21:11:00 +0300 Subject: viml/parser/expressions,klee: Fix some problems found by KLEE run --- src/nvim/viml/parser/expressions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index 0198852bed..025f0f766e 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -69,7 +69,7 @@ typedef enum { } ExprOptScope; #define EXPR_OPT_SCOPE_LIST \ - ((char *)(char[]){ kExprOptScopeGlobal, kExprOptScopeLocal }) + ((char[]){ kExprOptScopeGlobal, kExprOptScopeLocal }) /// All possible variable scopes typedef enum { -- cgit From 2cb95bd9378d3c45f2eea527683546825e16f7d3 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 15 Oct 2017 21:39:01 +0300 Subject: viml/parser/expressions: Define east_node_type_tab only when needed --- src/nvim/viml/parser/expressions.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 370034943e..4801d66988 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -725,6 +725,7 @@ viml_pexpr_repr_token_end: return ret; } +#ifdef UNIT_TESTING static const char *const east_node_type_tab[] = { [kExprNodeMissing] = "Missing", [kExprNodeOpMissing] = "OpMissing", @@ -765,6 +766,7 @@ static const char *const east_node_type_tab[] = { [kExprNodeOption] = "Option", [kExprNodeEnvironment] = "Environment", }; +#endif /// Represent `int` character as a string /// -- cgit From fe81380bf5d4d161187998088aa9cff948b7c891 Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 16 Oct 2017 00:30:55 +0300 Subject: viml/parser/expressions: Highlight prefix separately from number Should make accidental octals more visible. --- src/nvim/viml/parser/expressions.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 4801d66988..35f4385f33 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -1042,6 +1042,7 @@ void viml_pexpr_free_ast(ExprAST ast) // // NVimRegister -> SpecialChar // NVimNumber -> Number +// NVimNumberPrefix -> SpecialChar // NVimFloat -> NVimNumber // // NVimNestingParenthesis -> NVimParenthesis @@ -1842,6 +1843,14 @@ static const int want_node_to_lexer_flags[] = { [kENodeArgumentSeparator] = kELFlagForbidScope, }; +/// Number of characters to highlight as NumberPrefix depending on the base +static const uint8_t base_to_prefix_length[] = { + [2] = 2, + [8] = 1, + [10] = 0, + [16] = 2, +}; + /// Parse one VimL expression /// /// @param pstate Parser state. @@ -2632,7 +2641,13 @@ viml_pexpr_parse_figure_brace_closing_error: } else { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeInteger); cur_node->data.num.value = cur_token.data.num.val.integer; - HL_CUR_TOKEN(Number); + const uint8_t prefix_length = base_to_prefix_length[ + cur_token.data.num.base]; + viml_parser_highlight(pstate, cur_token.start, prefix_length, + HL(NumberPrefix)); + viml_parser_highlight( + pstate, shifted_pos(cur_token.start, prefix_length), + cur_token.len - prefix_length, HL(Number)); } want_node = kENodeOperator; *top_node_p = cur_node; -- cgit From ed253b5fe6515840fe6dd9df83855a0316de8bad Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 16 Oct 2017 00:39:48 +0300 Subject: klee: Include colors in test --- src/nvim/viml/parser/parser.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/parser.h b/src/nvim/viml/parser/parser.h index 10ced57977..fbc5ba5f07 100644 --- a/src/nvim/viml/parser/parser.h +++ b/src/nvim/viml/parser/parser.h @@ -176,8 +176,9 @@ static inline void viml_parser_highlight(ParserState *const pstate, if (pstate->colors == NULL || len == 0) { return; } - // TODO(ZyX-I): May do some assert() sanitizing here. - // TODO(ZyX-I): May join chunks. + assert(kv_size(*pstate->colors) == 0 + || kv_Z(*pstate->colors, 0).start.line < start.line + || kv_Z(*pstate->colors, 0).end_col <= start.col); kvi_push(*pstate->colors, ((ParserHighlightChunk) { .start = start, .end_col = start.col + len, -- cgit From 4c8ed65b608df06b4c72b641f4ecc86985295633 Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 16 Oct 2017 03:04:22 +0300 Subject: viml/parser/expressions: Fix memory leak when processing ternary --- src/nvim/viml/parser/expressions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 35f4385f33..876fbc8d37 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -2308,10 +2308,10 @@ viml_pexpr_parse_invalid_colon: _("E15: Colon outside of dictionary or ternary operator: %.*s")); viml_pexpr_parse_valid_colon: ADD_VALUE_IF_MISSING(_(EXP_VAL_COLON)); - NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); if (is_ternary) { HL_CUR_TOKEN(TernaryColon); } else { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeColon); ADD_OP_NODE(cur_node); HL_CUR_TOKEN(Colon); } -- cgit From c9f511d24a64da135bef4b9874c7bec04d9330e4 Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 16 Oct 2017 09:06:05 +0300 Subject: viml/parser/expressions: Remove unused flag --- src/nvim/viml/parser/expressions.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index 025f0f766e..d783518b3a 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -314,10 +314,6 @@ enum { /// When parsing expressions input by user bar is assumed to be a binary /// operator and other two are spacings. kExprFlagsDisallowEOC = (1 << 1), - /// Print errors when encountered - /// - /// Without the flag they are only taken into account when parsing. - kExprFlagsPrintError = (1 << 2), // WARNING: whenever you add a new flag, alter klee_assume() statement in // viml_expressions_parser.c. } ExprParserFlags; -- cgit From 895793fc820e04ea2d6bdaa90c6643c4dce2f0e7 Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 16 Oct 2017 09:14:02 +0300 Subject: viml/parser/expressions: Add some casts --- src/nvim/viml/parser/expressions.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 876fbc8d37..69817bf24f 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -698,8 +698,8 @@ const char *viml_pexpr_repr_token(const ParserState *const pstate, (int)token.data.num.is_float, (int)token.data.num.base, (double)(token.data.num.is_float - ? token.data.num.val.floating - : token.data.num.val.integer)) + ? (double)token.data.num.val.floating + : (double)token.data.num.val.integer)) TKNARGS(kExprLexInvalid, "(msg=%s)", token.data.err.msg) default: { // No additional arguments. -- cgit From 47938e1e22816381f26e8882eacd6e7e8baf37fd Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 19 Oct 2017 10:48:05 +0300 Subject: viml/parser/expressions: Fix some errors spotted by KLEE Not all of them are fixed yet though. --- src/nvim/viml/parser/expressions.c | 58 +++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 19 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 69817bf24f..da22cf4cdb 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -1091,7 +1091,7 @@ void viml_pexpr_free_ast(ExprAST ast) // NVimInvalidDoubleQuote -> NVimInvalidString // NVimInvalidDoubleQuotedBody -> NVimInvalidString // NVimInvalidDoubleQuotedEscape -> NVimInvalidStringSpecial -// NVimInvalidDoubleQuotedUnknownEscape -> NVimInvalid +// NVimInvalidDoubleQuotedUnknownEscape -> NVimInvalidDoubleQuotedEscape // // NVimFigureBrace -> NVimInternalError // NVimInvalidSingleQuotedUnknownEscape -> NVimInternalError @@ -1313,7 +1313,7 @@ static bool viml_pexpr_handle_bop(const ParserState *const pstate, /// ParserPosition literal based on ParserPosition pos with columns shifted /// -/// Function does not check whether remaining position is valid. +/// Function does not check whether resulting position is valid. /// /// @param[in] pos Position to shift. /// @param[in] shift Number of bytes to shift. @@ -1326,6 +1326,21 @@ static inline ParserPosition shifted_pos(const ParserPosition pos, return (ParserPosition) { .line = pos.line, .col = pos.col + shift }; } +/// ParserPosition literal based on ParserPosition pos with specified column +/// +/// Function does not check whether remaining position is valid. +/// +/// @param[in] pos Position to adjust. +/// @param[in] new_col New column. +/// +/// @return Shifted position. +static inline ParserPosition recol_pos(const ParserPosition pos, + const size_t new_col) + FUNC_ATTR_CONST FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_WARN_UNUSED_RESULT +{ + return (ParserPosition) { .line = pos.line, .col = new_col }; +} + /// Get highlight group name #define HL(g) (is_invalid ? "NVimInvalid" #g : "NVim" #g) @@ -1639,7 +1654,7 @@ static void parse_quoted_string(ParserState *const pstate, size_t n = (*p == 'u' ? 4 : 8); int nr = 0; p++; - while (n-- && ascii_isxdigit(p[1])) { + while (p + 1 < e && n-- && ascii_isxdigit(p[1])) { p++; nr = (nr << 4) + hex2nr(*p); } @@ -1659,7 +1674,7 @@ static void parse_quoted_string(ParserState *const pstate, if (*p >= '0' && *p <= '7') { size--; p++; - if (*p >= '0' && *p <= '7') { + if (p < e && *p >= '0' && *p <= '7') { size--; p++; } @@ -1715,7 +1730,7 @@ static void parse_quoted_string(ParserState *const pstate, // Hexadecimal or unicode. case 'X': case 'x': case 'u': case 'U': { - if (ascii_isxdigit(p[1])) { + if (p + 1 < e && ascii_isxdigit(p[1])) { size_t n; int nr; bool is_hex = (*p == 'x' || *p == 'X'); @@ -1728,7 +1743,7 @@ static void parse_quoted_string(ParserState *const pstate, n = 8; } nr = 0; - while (n-- && ascii_isxdigit(p[1])) { + while (p + 1 < e && n-- && ascii_isxdigit(p[1])) { p++; nr = (nr << 4) + hex2nr(*p); } @@ -1749,9 +1764,9 @@ static void parse_quoted_string(ParserState *const pstate, case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { uint8_t ch = (uint8_t)(*p++ - '0'); - if (*p >= '0' && *p <= '7') { + if (p < e && *p >= '0' && *p <= '7') { ch = (uint8_t)((ch << 3) + *p++ - '0'); - if (*p >= '0' && *p <= '7') { + if (p < e && *p >= '0' && *p <= '7') { ch = (uint8_t)((ch << 3) + *p++ - '0'); } } @@ -1793,7 +1808,7 @@ static void parse_quoted_string(ParserState *const pstate, // TODO(ZyX-I): use ast_stack to determine and highlight regular expressions // TODO(ZyX-I): use ast_stack to determine and highlight printf format str // TODO(ZyX-I): use ast_stack to determine and highlight expression strings - size_t next_col = 1; + size_t next_col = token.start.col + 1; const char *const body_str = (is_double ? HL(DoubleQuotedBody) : HL(SingleQuotedBody)); @@ -1806,20 +1821,23 @@ static void parse_quoted_string(ParserState *const pstate, for (size_t i = 0; i < kv_size(shifts); i++) { const StringShift cur_shift = kv_A(shifts, i); if (cur_shift.start > next_col) { - viml_parser_highlight(pstate, shifted_pos(token.start, next_col), + viml_parser_highlight(pstate, recol_pos(token.start, next_col), cur_shift.start - next_col, body_str); } - viml_parser_highlight(pstate, shifted_pos(token.start, cur_shift.start), + viml_parser_highlight(pstate, recol_pos(token.start, cur_shift.start), cur_shift.orig_len, (cur_shift.escape_not_known ? ukn_esc_str : esc_str)); next_col = cur_shift.start + cur_shift.orig_len; } - if (next_col < token.len - token.data.str.closed) { - viml_parser_highlight(pstate, shifted_pos(token.start, next_col), - token.len - token.data.str.closed - next_col, + if (next_col - token.start.col < token.len - token.data.str.closed) { + viml_parser_highlight(pstate, recol_pos(token.start, next_col), + (token.start.col + + token.len + - token.data.str.closed + - next_col), body_str); } } @@ -2580,6 +2598,9 @@ viml_pexpr_parse_figure_brace_closing_error: break; } case kExprLexPlainIdentifier: { + const ExprVarScope scope = (cur_token.type == kExprLexInvalid + ? kExprVarScopeMissing + : cur_token.data.var.scope); if (want_node == kENodeValue || want_node == kENodeArgument) { want_node = (want_node == kENodeArgument ? kENodeArgumentSeparator @@ -2588,9 +2609,8 @@ viml_pexpr_parse_figure_brace_closing_error: (node_is_key ? kExprNodePlainKey : kExprNodePlainIdentifier)); - cur_node->data.var.scope = cur_token.data.var.scope; - const size_t scope_shift = ( - cur_token.data.var.scope == kExprVarScopeMissing ? 0 : 2); + cur_node->data.var.scope = scope; + const size_t scope_shift = (scope == kExprVarScopeMissing ? 0 : 2); cur_node->data.var.ident = (pline.data + cur_token.start.col + scope_shift); cur_node->data.var.ident_len = cur_token.len - scope_shift; @@ -2609,11 +2629,11 @@ viml_pexpr_parse_figure_brace_closing_error: ? HL(IdentifierKey) : HL(Identifier))); } else { - if (cur_token.data.var.scope == kExprVarScopeMissing) { + if (scope == kExprVarScopeMissing) { ADD_IDENT( do { NEW_NODE_WITH_CUR_POS(cur_node, kExprNodePlainIdentifier); - cur_node->data.var.scope = cur_token.data.var.scope; + cur_node->data.var.scope = scope; cur_node->data.var.ident = pline.data + cur_token.start.col; cur_node->data.var.ident_len = cur_token.len; want_node = kENodeOperator; -- cgit From 568cf73c90af2966ee091f2180905a8cf9582064 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 29 Oct 2017 01:29:48 +0300 Subject: viml/parser/expressions: Fix last error found by KLEE --- src/nvim/viml/parser/expressions.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index da22cf4cdb..b413d56592 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -2445,6 +2445,7 @@ viml_pexpr_parse_bracket_closing_error: cur_node->children = *top_node_p; } *top_node_p = cur_node; + new_top_node_p = top_node_p; goto viml_pexpr_parse_figure_brace_closing_error; } if (want_node == kENodeValue) { -- cgit From b935a12dab17c3887db9c5fd7c90b34b2c51170f Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 29 Oct 2017 16:32:13 +0300 Subject: ex_getln: Make use of new parser to color expressions Retires g:Nvim_color_expr callback. --- src/nvim/viml/parser/parser.c | 13 ++++++++++ src/nvim/viml/parser/parser.h | 55 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 src/nvim/viml/parser/parser.c (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/parser.c b/src/nvim/viml/parser/parser.c new file mode 100644 index 0000000000..08d8846018 --- /dev/null +++ b/src/nvim/viml/parser/parser.c @@ -0,0 +1,13 @@ +#include "nvim/viml/parser/parser.h" + +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "viml/parser/parser.c.generated.h" +#endif + + +void parser_simple_get_line(void *cookie, ParserLine *ret_pline) +{ + ParserLine **plines_p = (ParserLine **)cookie; + *ret_pline = **plines_p; + (*plines_p)++; +} diff --git a/src/nvim/viml/parser/parser.h b/src/nvim/viml/parser/parser.h index fbc5ba5f07..7ac49709d8 100644 --- a/src/nvim/viml/parser/parser.h +++ b/src/nvim/viml/parser/parser.h @@ -8,6 +8,7 @@ #include "nvim/lib/kvec.h" #include "nvim/func_attr.h" #include "nvim/mbyte.h" +#include "nvim/memory.h" /// One parsed line typedef struct { @@ -80,6 +81,56 @@ typedef struct { bool can_continuate; } ParserState; +static inline void viml_parser_init( + ParserState *const ret_pstate, + const ParserLineGetter get_line, void *const cookie, + ParserHighlight *const colors) + REAL_FATTR_ALWAYS_INLINE REAL_FATTR_NONNULL_ARG(1, 2); + +/// Initialize a new parser state instance +/// +/// @param[out] ret_pstate Parser state to initialize. +/// @param[in] get_line Line getter function. +/// @param[in] cookie Argument for the get_line function. +/// @param[in] colors Where to save highlighting. May be NULL if it is not +/// needed. +static inline void viml_parser_init( + ParserState *const ret_pstate, + const ParserLineGetter get_line, void *const cookie, + ParserHighlight *const colors) +{ + *ret_pstate = (ParserState) { + .reader = { + .get_line = get_line, + .cookie = cookie, + .conv = MBYTE_NONE_CONV, + }, + .pos = { 0, 0 }, + .colors = colors, + .can_continuate = false, + }; + kvi_init(ret_pstate->reader.lines); + kvi_init(ret_pstate->stack); +} + +static inline void viml_parser_destroy(ParserState *const pstate) + REAL_FATTR_NONNULL_ALL REAL_FATTR_ALWAYS_INLINE; + +/// Free all memory allocated by the parser on heap +/// +/// @param pstate Parser state to free. +static inline void viml_parser_destroy(ParserState *const pstate) +{ + for (size_t i = 0; i < kv_size(pstate->reader.lines); i++) { + ParserLine pline = kv_A(pstate->reader.lines, i); + if (pline.allocated) { + xfree((void *)pline.data); + } + } + kvi_destroy(pstate->reader.lines); + kvi_destroy(pstate->stack); +} + static inline void viml_preader_get_line(ParserInputReader *const preader, ParserLine *const ret_pline) REAL_FATTR_NONNULL_ALL; @@ -186,4 +237,8 @@ static inline void viml_parser_highlight(ParserState *const pstate, })); } +#ifdef INCLUDE_GENERATED_DECLARATIONS +# include "viml/parser/parser.h.generated.h" +#endif + #endif // NVIM_VIML_PARSER_PARSER_H -- cgit From 22d161a5dd1c519f998916f45d61be92662fbb44 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 29 Oct 2017 20:11:44 +0300 Subject: api/vim: Add nvim_parse_expression function --- src/nvim/viml/parser/expressions.c | 4 +--- src/nvim/viml/parser/expressions.h | 3 +++ 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index b413d56592..4e8a9b8523 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -849,8 +849,7 @@ static inline void viml_pexpr_debug_print_token( viml_pexpr_debug_print_token(pstate, tkn) #endif -#ifndef NDEBUG -static const uint8_t node_maxchildren[] = { +const uint8_t node_maxchildren[] = { [kExprNodeMissing] = 0, [kExprNodeOpMissing] = 2, [kExprNodeTernary] = 2, @@ -890,7 +889,6 @@ static const uint8_t node_maxchildren[] = { [kExprNodeOption] = 0, [kExprNodeEnvironment] = 0, }; -#endif /// Free memory occupied by AST /// diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index d783518b3a..d00d4855f3 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -338,6 +338,9 @@ typedef struct { ExprASTNode *root; } ExprAST; +/// Array mapping ExprASTNodeType to maximum amount of children node may have +extern const uint8_t node_maxchildren[]; + #ifdef INCLUDE_GENERATED_DECLARATIONS # include "viml/parser/expressions.h.generated.h" #endif -- cgit From 748f3ad5bbf9706dddddeea5df693221d6ae5e94 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 29 Oct 2017 21:30:06 +0300 Subject: syntax,viml/expressions/parser: Create defaults for expr highlighting --- src/nvim/viml/parser/expressions.c | 115 +++---------------------------------- 1 file changed, 7 insertions(+), 108 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 4e8a9b8523..615a59573f 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -993,107 +993,6 @@ void viml_pexpr_free_ast(ExprAST ast) // > ># >? <= <=# <=? // < <# = >=# >=? // is is# is? isnot isnot# isnot? -// -// Used highlighting groups and assumed linkage: -// -// NVimInternalError -> highlight as fg:red/bg:red -// -// NVimInvalid -> Error -// NVimInvalidValue -> NVimInvalid -// NVimInvalidOperator -> NVimInvalid -// NVimInvalidDelimiter -> NVimInvalid -// -// NVimOperator -> Operator -// NVimUnaryOperator -> NVimOperator -// NVimBinaryOperator -> NVimOperator -// -// NVimComparisonOperator -> NVimBinaryOperator -// NVimComparisonOperatorModifier -> NVimComparisonOperator -// -// NVimTernary -> NVimOperator -// NVimTernaryColon -> NVimTernary -// -// NVimParenthesis -> Delimiter -// -// NVimColon -> Delimiter -// NVimComma -> Delimiter -// NVimArrow -> Delimiter -// -// NVimLambda -> Delimiter -// NVimDict -> Delimiter -// NVimCurly -> Delimiter -// -// NVimList -> Delimiter -// NVimSubscript -> Delimiter -// NVimSubscriptColon -> NVimSubscript -// -// NVimIdentifier -> Identifier -// NVimIdentifierScope -> NVimIdentifier -// NVimIdentifierScopeDelimiter -> NVimIdentifier -// -// NVimIdentifierKey -> Identifier -// -// NVimUnaryPlus -> NVimUnaryOperator -// NVimBinaryPlus -> NVimBinaryOperator -// NVimConcat -> NVimBinaryOperator -// NVimConcatOrSubscript -> NVimConcat -// -// NVimRegister -> SpecialChar -// NVimNumber -> Number -// NVimNumberPrefix -> SpecialChar -// NVimFloat -> NVimNumber -// -// NVimNestingParenthesis -> NVimParenthesis -// NVimCallingParenthesis -> NVimParenthesis -// -// NVimString -> String -// NVimStringSpecial -> SpecialChar -// NVimSingleQuote -> NVimString -// NVimSingleQuotedBody -> NVimString -// NVimSingleQuotedQuote -> NVimStringSpecial -// NVimDoubleQuote -> NVimString -// NVimDoubleQuotedBody -> NVimString -// NVimDoubleQuotedEscape -> NVimStringSpecial -// NVimDoubleQuotedUnknownEscape -> NVimInvalid -// -// " Note: NVimDoubleQuotedUnknownEscape is not actually invalid -// -// NVimInvalidComma -> NVimInvalidDelimiter -// NVimInvalidSpacing -> NVimInvalid -// NVimInvalidTernary -> NVimInvalidOperator -// NVimInvalidTernaryColon -> NVimInvalidTernary -// NVimInvalidRegister -> NVimInvalidValue -// NVimInvalidClosingBracket -> NVimInvalidDelimiter -// NVimInvalidSpacing -> NVimInvalid -// NVimInvalidArrow -> NVimInvalidDelimiter -// NVimInvalidLambda -> NVimInvalidDelimiter -// NVimInvalidDict -> NVimInvalidDelimiter -// NVimInvalidCurly -> NVimInvalidDelimiter -// NVimInvalidFigureBrace -> NVimInvalidDelimiter -// NVimInvalidIdentifier -> NVimInvalidValue -// NVimInvalidIdentifierScope -> NVimInvalidValue -// NVimInvalidIdentifierScopeDelimiter -> NVimInvalidValue -// NVimInvalidComparisonOperator -> NVimInvalidOperator -// NVimInvalidComparisonOperatorModifier -> NVimInvalidComparisonOperator -// NVimInvalidNumber -> NVimInvalidValue -// NVimInvalidFloat -> NVimInvalidValue -// NVimInvalidIdentifierKey -> NVimInvalidIdentifier -// NVimInvalidList -> NVimInvalidDelimiter -// NVimInvalidSubscript -> NVimInvalidDelimiter -// NVimInvalidSubscriptColon -> NVimInvalidSubscript -// NVimInvalidString -> NVimInvalidValue -// NVimInvalidStringSpecial -> NVimInvalidString -// NVimInvalidSingleQuote -> NVimInvalidString -// NVimInvalidSingleQuotedBody -> NVimInvalidString -// NVimInvalidSingleQuotedQuote -> NVimInvalidStringSpecial -// NVimInvalidDoubleQuote -> NVimInvalidString -// NVimInvalidDoubleQuotedBody -> NVimInvalidString -// NVimInvalidDoubleQuotedEscape -> NVimInvalidStringSpecial -// NVimInvalidDoubleQuotedUnknownEscape -> NVimInvalidDoubleQuotedEscape -// -// NVimFigureBrace -> NVimInternalError -// NVimInvalidSingleQuotedUnknownEscape -> NVimInternalError -// NVimSingleQuotedUnknownEscape -> NVimInternalError /// Allocate a new node and set some of the values /// @@ -2183,12 +2082,12 @@ viml_pexpr_parse_process_token: ADD_OP_NODE(cur_node); if (cur_token.data.cmp.ccs != kCCStrategyUseOption) { viml_parser_highlight(pstate, cur_token.start, cur_token.len - 1, - HL(ComparisonOperator)); + HL(Comparison)); viml_parser_highlight( pstate, shifted_pos(cur_token.start, cur_token.len - 1), 1, - HL(ComparisonOperatorModifier)); + HL(ComparisonModifier)); } else { - HL_CUR_TOKEN(ComparisonOperator); + HL_CUR_TOKEN(Comparison); } want_node = kENodeValue; break; @@ -2390,7 +2289,7 @@ viml_pexpr_parse_valid_colon: break; } case kExprNodeSubscript: { - HL_CUR_TOKEN(Subscript); + HL_CUR_TOKEN(SubscriptBracket); break; } default: { @@ -2418,7 +2317,7 @@ viml_pexpr_parse_bracket_closing_error: } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeSubscript); ADD_OP_NODE(cur_node); - HL_CUR_TOKEN(Subscript); + HL_CUR_TOKEN(SubscriptBracket); } } break; @@ -2626,7 +2525,7 @@ viml_pexpr_parse_figure_brace_closing_error: cur_token.len - scope_shift, (node_is_key ? HL(IdentifierKey) - : HL(Identifier))); + : HL(IdentifierName))); } else { if (scope == kExprVarScopeMissing) { ADD_IDENT( @@ -2637,7 +2536,7 @@ viml_pexpr_parse_figure_brace_closing_error: cur_node->data.var.ident_len = cur_token.len; want_node = kENodeOperator; } while (0), - Identifier); + IdentifierName); } else { OP_MISSING; } -- cgit From 538af1c90a4ac9928f60e97338869e516def4956 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 29 Oct 2017 22:02:19 +0300 Subject: syntax,viml/parser/expressions: Add missing highlight groups Also adjusts some names. --- src/nvim/viml/parser/expressions.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 615a59573f..f5bc547d54 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -1472,7 +1472,7 @@ static void parse_quoted_string(ParserState *const pstate, kvec_withinit_t(StringShift, 16) shifts; kvi_init(shifts); if (!is_double) { - viml_parser_highlight(pstate, token.start, 1, HL(SingleQuotedString)); + viml_parser_highlight(pstate, token.start, 1, HL(SingleQuote)); while (p < e) { const char *const chunk_e = memchr(p, '\'', (size_t)(e - p)); if (chunk_e == NULL) { @@ -1509,7 +1509,7 @@ static void parse_quoted_string(ParserState *const pstate, } } } else { - viml_parser_highlight(pstate, token.start, 1, HL(DoubleQuotedString)); + viml_parser_highlight(pstate, token.start, 1, HL(DoubleQuote)); for (p = s + 1; p < e; p++) { if (*p == '\\' && p + 1 < e) { p++; @@ -1741,10 +1741,10 @@ static void parse_quoted_string(ParserState *const pstate, if (token.data.str.closed) { if (is_double) { viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1), - 1, HL(DoubleQuotedString)); + 1, HL(DoubleQuote)); } else { viml_parser_highlight(pstate, shifted_pos(token.start, token.len - 1), - 1, HL(SingleQuotedString)); + 1, HL(SingleQuote)); } } kvi_destroy(shifts); @@ -2035,7 +2035,7 @@ viml_pexpr_parse_process_token: } viml_parser_highlight( pstate, shifted_pos(cur_token.start, scope_shift + 1), - cur_token.len - (scope_shift + 1), HL(Option)); + cur_token.len - (scope_shift + 1), HL(OptionName)); break; } case kExprLexEnv: { @@ -2053,7 +2053,7 @@ viml_pexpr_parse_process_token: want_node = kENodeOperator; viml_parser_highlight(pstate, cur_token.start, 1, HL(EnvironmentSigil)); viml_parser_highlight(pstate, shifted_pos(cur_token.start, 1), - cur_token.len - 1, HL(Environment)); + cur_token.len - 1, HL(EnvironmentName)); break; } case kExprLexNot: { -- cgit From a9b203d23fcc6ba8c4e298b8e082db990e1ec04f Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 30 Oct 2017 01:32:10 +0300 Subject: *: Fix linter errors Big function in expressions.c may be refactored, if I ever catch the idea how to split it right. --- src/nvim/viml/parser/expressions.c | 8 ++++---- src/nvim/viml/parser/expressions.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index f5bc547d54..fc184f56f5 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -1814,8 +1814,8 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) || ((*kv_Z(ast_stack, 1))->type != kExprNodeConcat && ((*kv_Z(ast_stack, 1))->type != kExprNodeConcatOrSubscript)))) - ? kELFlagAllowFloat - : 0)); + ? kELFlagAllowFloat + : 0)); LexExprToken cur_token = viml_pexpr_next_token( pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags); if (cur_token.type == kExprLexEOC) { @@ -1876,7 +1876,7 @@ viml_pexpr_parse_process_token: // time. // // Here example will always contain a concat with "a:2" sucking colon, - // making expression invalid both because there is no longer a spare colon + // making expression invalid both because there is no longer a spare colon // for ternary and because concatenating dictionary with anything is not // valid. There are more cases when this will make a difference though. const bool node_is_key = ( @@ -2853,7 +2853,7 @@ viml_pexpr_parse_end: } kvi_destroy(ast_stack); return ast; -} +} // NOLINT(readability/fn_size) #undef NEW_NODE #undef HL diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index d00d4855f3..668c2a4c84 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -189,7 +189,7 @@ typedef enum { kExprNodeCall, ///< Function call. /// Plain identifier: simple variable/function name /// - /// Looks like "string", "g:Foo", etc: consists from a single + /// Looks like "string", "g:Foo", etc: consists from a single /// kExprLexPlainIdentifier token. kExprNodePlainIdentifier, /// Plain dictionary key, for use with kExprNodeConcatOrSubscript -- cgit From 07ec709141886c6db4f944665e07a36ef7302eb4 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 5 Nov 2017 01:33:44 +0300 Subject: vim/api: Actually dump AST, fix some bugs in nvim_parse_expression --- src/nvim/viml/parser/expressions.c | 10 ++++------ src/nvim/viml/parser/expressions.h | 9 +++++++++ 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index fc184f56f5..b19aab22af 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -616,7 +616,7 @@ static const char *const eltkn_type_tab[] = { [kExprLexArrow] = "Arrow", }; -static const char *const eltkn_cmp_type_tab[] = { +const char *const eltkn_cmp_type_tab[] = { [kExprCmpEqual] = "Equal", [kExprCmpMatches] = "Matches", [kExprCmpGreater] = "Greater", @@ -624,7 +624,7 @@ static const char *const eltkn_cmp_type_tab[] = { [kExprCmpIdentical] = "Identical", }; -static const char *const ccs_tab[] = { +const char *const ccs_tab[] = { [kCCStrategyUseOption] = "UseOption", [kCCStrategyMatchCase] = "MatchCase", [kCCStrategyIgnoreCase] = "IgnoreCase", @@ -725,8 +725,7 @@ viml_pexpr_repr_token_end: return ret; } -#ifdef UNIT_TESTING -static const char *const east_node_type_tab[] = { +const char *const east_node_type_tab[] = { [kExprNodeMissing] = "Missing", [kExprNodeOpMissing] = "OpMissing", [kExprNodeTernary] = "Ternary", @@ -766,7 +765,6 @@ static const char *const east_node_type_tab[] = { [kExprNodeOption] = "Option", [kExprNodeEnvironment] = "Environment", }; -#endif /// Represent `int` character as a string /// @@ -2148,10 +2146,10 @@ viml_pexpr_parse_invalid_comma: } #define EXP_VAL_COLON "E15: Expected value, got colon: %.*s" case kExprLexColon: { + bool is_ternary = false; if (kv_size(ast_stack) < 2) { goto viml_pexpr_parse_invalid_colon; } - bool is_ternary = false; bool can_be_ternary = true; bool is_subscript = false; for (size_t i = 1; i < kv_size(ast_stack); i++) { diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index 668c2a4c84..648f8cbc1f 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -341,6 +341,15 @@ typedef struct { /// Array mapping ExprASTNodeType to maximum amount of children node may have extern const uint8_t node_maxchildren[]; +/// Array mapping ExprASTNodeType values to their stringified versions +extern const char *const east_node_type_tab[]; + +/// Array mapping ExprComparisonType values to their stringified versions +extern const char *const eltkn_cmp_type_tab[]; + +/// Array mapping ExprCaseCompareStrategy values to their stringified versions +extern const char *const ccs_tab[]; + #ifdef INCLUDE_GENERATED_DECLARATIONS # include "viml/parser/expressions.h.generated.h" #endif -- cgit From 7bc6de75263f58c6c4f999bc86a6454ae9f28b80 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 5 Nov 2017 02:41:44 +0300 Subject: api/vim,functests: Add tests for nvim_parse_expression, fix found bugs --- src/nvim/viml/parser/expressions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index b19aab22af..b10952a8ac 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -1492,7 +1492,7 @@ static void parse_quoted_string(ParserState *const pstate, node->data.str.value = NULL; } else { char *v_p; - v_p = node->data.str.value = xmalloc(size); + v_p = node->data.str.value = xmallocz(size); p = s + 1; while (p < e) { const char *const chunk_e = memchr(p, '\'', (size_t)(e - p)); -- cgit From 05f775b5f248d922c9539432235738cc53e7edd7 Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 6 Nov 2017 01:57:22 +0300 Subject: viml/parser/expressions: Briefly document some differences --- src/nvim/viml/parser/expressions.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index b10952a8ac..998edb1ed4 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -3,6 +3,44 @@ /// VimL expression parser +// Planned incompatibilities (to be included into vim_diff.txt when this parser +// will be an actual part of VimL evaluation process): +// +// 1. Expressions are first fully parsed and only then executed. This means +// that while ":echo [system('touch abc')" will create file "abc" in Vim and +// only then raise syntax error regarding missing comma in list in Neovim +// trying to execute that will immediately raise syntax error regarding +// missing list end without actually executing anything. +// 2. Expressions are first fully parsed, without considering any runtime +// information. This means things like that "d.a" does not change its +// meaning depending on type of "d" (or whether Vim is currently executing or +// skipping). For compatibility reasons the dot thus may either be “concat +// or subscript” operator or just “concat” operator. +// 3. Expressions parser is aware whether it is called for :echo or =. +// This means that while "=1 | 2" is equivalent to "=1" +// because "| 2" part is left to be treated as a command separator and then +// ignored in Neovim it is an error. +// 4. Expressions parser has generally better error reporting. But for +// compatibility reasons most errors have error code E15 while error messages +// are significantly different from Vim’s E15. Also some error codes were +// retired because of being harder to emulate or because of them being +// a result of differences in parsing process: e.g. with ":echo {a, b}" Vim +// will attempt to parse expression as lambda, fail, check whether it is +// a curly-braces-name, fail again, and evaluate that as a dictionary, giving +// error regarding undefined variable "a" (or about missing colon). Neovim +// will not try to evaluate anything here: comma right after an argument name +// means that expression may not be anything, but lambda, so the resulting +// error message will never be about missing variable or colon: it will be +// about missing arrow (or a continuation of argument list). +// 5. Failing to parse expression always gives exactly one error message: no +// more stack of error messages like > +// +// :echo [1, +// E697: Missing end of List ']': +// E15: Invalid expression: [1, +// +// < , just exactly one E697 message. + #include #include #include -- cgit From 4aebd00a9eeeb2f56ff53dd4e383825e997ee7be Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 6 Nov 2017 20:28:37 +0300 Subject: *: Fix linter errors --- src/nvim/viml/parser/expressions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 998edb1ed4..71eda2cdc1 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -22,7 +22,7 @@ // ignored in Neovim it is an error. // 4. Expressions parser has generally better error reporting. But for // compatibility reasons most errors have error code E15 while error messages -// are significantly different from Vim’s E15. Also some error codes were +// are significantly different from Vim’s E15. Also some error codes were // retired because of being harder to emulate or because of them being // a result of differences in parsing process: e.g. with ":echo {a, b}" Vim // will attempt to parse expression as lambda, fail, check whether it is -- cgit From 1aa6276c29d562a6287519e6755a613eabca5c31 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 12 Nov 2017 00:03:45 +0300 Subject: viml/parser/expressions: Replace lambda-specific WantedNode entries This way code will be easier to adapt to handling (partially) non-expressions like :let lvalue part or :function definitions, and that would be needed in the future both for proper completion support and for the Ex commands parser. --- src/nvim/viml/parser/expressions.c | 101 +++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 49 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 71eda2cdc1..e23c58bfd1 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -73,12 +73,21 @@ typedef enum { /// For unrestricted expressions as well, implies that top item in AST stack /// points to NULL. kENodeValue, - /// Argument: only allows simple argument names. - kENodeArgument, - /// Argument separator: only allows commas. - kENodeArgumentSeparator, } ExprASTWantedNode; +/// Parse type: what is being parsed currently +typedef enum { + /// Parsing regular VimL expression + kEPTExpr = 0, + /// Parsing lambda arguments + /// + /// Just like parsing function arguments, but it is valid to be ended with an + /// arrow only. + kEPTLambdaArguments, +} ExprASTParseType; + +typedef kvec_withinit_t(ExprASTParseType, 4) ExprASTParseTypeStack; + /// Operator priority level typedef enum { kEOpLvlInvalid = 0, @@ -1238,9 +1247,7 @@ static bool viml_pexpr_handle_bop(const ParserState *const pstate, ret = false; } } - *want_node_p = (*want_node_p == kENodeArgumentSeparator - ? kENodeArgument - : kENodeValue); + *want_node_p = kENodeValue; return ret; } @@ -1790,8 +1797,6 @@ static void parse_quoted_string(ParserState *const pstate, static const int want_node_to_lexer_flags[] = { [kENodeValue] = kELFlagIsNotCmp, [kENodeOperator] = kELFlagForbidScope, - [kENodeArgument] = kELFlagIsNotCmp, - [kENodeArgumentSeparator] = kELFlagForbidScope, }; /// Number of characters to highlight as NumberPrefix depending on the base @@ -1827,12 +1832,13 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) // (list start) last stack item contains NULL. Otherwise last stack item is // supposed to contain last “finished” value: e.g. "1" or "+(1, 1)" (node // representing "1+1"). - // - // Both kENodeValue and kENodeArgument stand for “value” nodes. ExprASTStack ast_stack; kvi_init(ast_stack); kvi_push(ast_stack, &ast.root); ExprASTWantedNode want_node = kENodeValue; + ExprASTParseTypeStack pt_stack; + kvi_init(pt_stack); + kvi_push(pt_stack, kEPTExpr); LexExprToken prev_token = { .type = kExprLexMissing }; bool highlighted_prev_spacing = false; // Lambda node, valid when parsing lambda arguments only. @@ -1884,8 +1890,7 @@ viml_pexpr_parse_process_token: assert(kv_size(ast_stack) >= 1); ExprASTNode *cur_node = NULL; #ifndef NDEBUG - const bool want_value = ( - want_node == kENodeValue || want_node == kENodeArgument); + const bool want_value = (want_node == kENodeValue); assert(want_value == (*top_node_p == NULL)); assert(kv_A(ast_stack, 0) == &ast.root); // Check that stack item i + 1 points to stack items’ i *last* child. @@ -1933,14 +1938,15 @@ viml_pexpr_parse_process_token: // circumstances, and in any case runtime and not parse time errors. (*kv_Z(ast_stack, 1))->type = kExprNodeConcat; } - if ((want_node == kENodeArgumentSeparator - && tok_type != kExprLexComma - && tok_type != kExprLexArrow) - || (want_node == kENodeArgument - && !(cur_token.type == kExprLexPlainIdentifier - && cur_token.data.var.scope == kExprVarScopeMissing - && !cur_token.data.var.autoload) - && tok_type != kExprLexArrow)) { + if (kv_last(pt_stack) == kEPTLambdaArguments + && ((want_node == kENodeOperator + && tok_type != kExprLexComma + && tok_type != kExprLexArrow) + || (want_node == kENodeValue + && !(cur_token.type == kExprLexPlainIdentifier + && cur_token.data.var.scope == kExprVarScopeMissing + && !cur_token.data.var.autoload) + && tok_type != kExprLexArrow))) { lambda_node->data.fig.type_guesses.allow_lambda = false; if (lambda_node->children != NULL && lambda_node->children->type == kExprNodeComma) { @@ -1956,16 +1962,11 @@ viml_pexpr_parse_process_token: // Else it may appear that possibly-lambda node is actually a dictionary // or curly-braces-name identifier. lambda_node = NULL; - if (want_node == kENodeArgumentSeparator) { - want_node = kENodeOperator; - } else { - want_node = kENodeValue; - } + kv_drop(pt_stack, 1); } } - assert(lambda_node == NULL - || want_node == kENodeArgumentSeparator - || want_node == kENodeArgument); + const ExprASTParseType cur_pt = kv_last(pt_stack); + assert(lambda_node == NULL || cur_pt == kEPTLambdaArguments); switch (tok_type) { case kExprLexMissing: case kExprLexSpacing: @@ -2129,7 +2130,7 @@ viml_pexpr_parse_process_token: break; } case kExprLexComma: { - assert(want_node != kENodeArgument); + assert(!(want_node == kENodeValue && cur_pt == kEPTLambdaArguments)); if (want_node == kENodeValue) { // Value level: comma appearing here is not valid. // Note: in Vim string(,x) will give E116, this is not the case here. @@ -2138,13 +2139,11 @@ viml_pexpr_parse_process_token: NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeMissing); cur_node->len = 0; *top_node_p = cur_node; - want_node = (want_node == kENodeArgument - ? kENodeArgumentSeparator - : kENodeOperator); + want_node = kENodeOperator; } - if (want_node == kENodeArgumentSeparator) { - assert(lambda_node->data.fig.type_guesses.allow_lambda); + if (cur_pt == kEPTLambdaArguments) { assert(lambda_node != NULL); + assert(lambda_node->data.fig.type_guesses.allow_lambda); SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda); } if (kv_size(ast_stack) < 2) { @@ -2156,7 +2155,8 @@ viml_pexpr_parse_process_token: const ExprASTNodeType eastnode_type = (*eastnode_p)->type; const ExprOpLvl eastnode_lvl = node_lvl(**eastnode_p); if (eastnode_type == kExprNodeLambda) { - assert(want_node == kENodeArgumentSeparator); + assert(cur_pt == kEPTLambdaArguments + && want_node == kENodeOperator); break; } else if (eastnode_type == kExprNodeDictLiteral || eastnode_type == kExprNodeListLiteral @@ -2410,9 +2410,6 @@ viml_pexpr_parse_bracket_closing_error: case kExprNodeUnknownFigure: { if (new_top_node->children == NULL) { // No children of curly braces node indicates empty dictionary. - - // Should actually be kENodeArgument, but that was changed - // earlier. assert(want_node == kENodeValue); assert(new_top_node->data.fig.type_guesses.allow_dict); SELECT_FIGURE_BRACE_TYPE(new_top_node, DictLiteral, Dict); @@ -2475,7 +2472,7 @@ viml_pexpr_parse_figure_brace_closing_error: } *top_node_p = cur_node; kvi_push(ast_stack, &cur_node->children); - want_node = kENodeArgument; + kvi_push(pt_stack, kEPTLambdaArguments); lambda_node = cur_node; } else { ADD_IDENT( @@ -2495,9 +2492,12 @@ viml_pexpr_parse_figure_brace_closing_error: break; } case kExprLexArrow: { - if (want_node == kENodeArgumentSeparator - || want_node == kENodeArgument) { - if (want_node == kENodeArgument) { + if (cur_pt == kEPTLambdaArguments) { + kv_drop(pt_stack, 1); + assert(kv_size(pt_stack)); + if (want_node == kENodeValue) { + // Wanting value means trailing comma and NULL at the top of the + // stack. kv_drop(ast_stack, 1); } assert(kv_size(ast_stack) >= 1); @@ -2509,7 +2509,7 @@ viml_pexpr_parse_figure_brace_closing_error: SELECT_FIGURE_BRACE_TYPE(lambda_node, Lambda, Lambda); NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeArrow); if (lambda_node->children == NULL) { - assert(want_node == kENodeArgument); + assert(want_node == kENodeValue); lambda_node->children = cur_node; kvi_push(ast_stack, &lambda_node->children); } else { @@ -2535,10 +2535,8 @@ viml_pexpr_parse_figure_brace_closing_error: const ExprVarScope scope = (cur_token.type == kExprLexInvalid ? kExprVarScopeMissing : cur_token.data.var.scope); - if (want_node == kENodeValue || want_node == kENodeArgument) { - want_node = (want_node == kENodeArgument - ? kENodeArgumentSeparator - : kENodeOperator); + if (want_node == kENodeValue) { + want_node = kENodeOperator; NEW_NODE_WITH_CUR_POS(cur_node, (node_is_key ? kExprNodePlainKey @@ -2755,7 +2753,12 @@ viml_pexpr_parse_cycle_end: viml_parser_advance(pstate, cur_token.len); } while (true); viml_pexpr_parse_end: - if (want_node == kENodeValue) { + assert(kv_size(pt_stack)); + assert(kv_size(ast_stack)); + if (want_node == kENodeValue + // Blacklist some parse type entries as their presence means better error + // message in the other branch. + && kv_last(pt_stack) != kEPTLambdaArguments) { east_set_error(pstate, &ast.err, _("E15: Expected value, got EOC: %.*s"), pstate->pos); } else if (kv_size(ast_stack) != 1) { -- cgit From c7495ebcc0918ffd682083408895451318e41d1f Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 12 Nov 2017 02:18:43 +0300 Subject: viml/parser/expressions: Add support for parsing assignments --- src/nvim/viml/parser/expressions.c | 239 +++++++++++++++++++++++++++++++------ src/nvim/viml/parser/expressions.h | 40 ++++++- 2 files changed, 241 insertions(+), 38 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index e23c58bfd1..13f7131744 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -84,6 +84,10 @@ typedef enum { /// Just like parsing function arguments, but it is valid to be ended with an /// arrow only. kEPTLambdaArguments, + /// Assignment: parsing for :let + kEPTAssignment, + /// Single assignment: used when lists are not allowed (i.e. when nesting) + kEPTSingleAssignment, } ExprASTParseType; typedef kvec_withinit_t(ExprASTParseType, 4) ExprASTParseTypeStack; @@ -93,6 +97,7 @@ typedef enum { kEOpLvlInvalid = 0, kEOpLvlComplexIdentifier, kEOpLvlParens, + kEOpLvlAssignment, kEOpLvlArrow, kEOpLvlComma, kEOpLvlColon, @@ -217,8 +222,6 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) } CHAR(kExprLexQuestion, '?') CHAR(kExprLexColon, ':') - CHAR(kExprLexDot, '.') - CHAR(kExprLexPlus, '+') CHAR(kExprLexComma, ',') #undef CHAR @@ -532,12 +535,8 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) case '!': case '=': { if (pline.size == 1) { -viml_pexpr_next_token_invalid_comparison: - ret.type = (schar == '!' ? kExprLexNot : kExprLexInvalid); - if (ret.type == kExprLexInvalid) { - ret.data.err.msg = _("E15: Expected == or =~: %.*s"); - ret.data.err.type = kExprLexComparison; - } + ret.type = (schar == '!' ? kExprLexNot : kExprLexAssignment); + ret.data.ass.type = kExprAsgnPlain; break; } ret.type = kExprLexComparison; @@ -548,8 +547,11 @@ viml_pexpr_next_token_invalid_comparison: } else if (pline.data[1] == '~') { ret.data.cmp.type = kExprCmpMatches; ret.len++; + } else if (schar == '!') { + ret.type = kExprLexNot; } else { - goto viml_pexpr_next_token_invalid_comparison; + ret.type = kExprLexAssignment; + ret.data.ass.type = kExprAsgnPlain; } GET_CCS(ret, pline); break; @@ -571,17 +573,37 @@ viml_pexpr_next_token_invalid_comparison: break; } - // Minus sign or arrow from lambdas. + // Minus sign, arrow from lambdas or augmented assignment. case '-': { if (pline.size > 1 && pline.data[1] == '>') { ret.len++; ret.type = kExprLexArrow; + } else if (pline.size > 1 && pline.data[1] == '=') { + ret.len++; + ret.type = kExprLexAssignment; + ret.data.ass.type = kExprAsgnSubtract; } else { ret.type = kExprLexMinus; } break; } + // Sign or augmented assignment. +#define CHAR_OR_ASSIGN(ch, ch_type, ass_type) \ + case ch: { \ + if (pline.size > 1 && pline.data[1] == '=') { \ + ret.len++; \ + ret.type = kExprLexAssignment; \ + ret.data.ass.type = ass_type; \ + } else { \ + ret.type = ch_type; \ + } \ + break; \ + } + CHAR_OR_ASSIGN('+', kExprLexPlus, kExprAsgnAdd) + CHAR_OR_ASSIGN('.', kExprLexDot, kExprAsgnConcat) +#undef CHAR_OR_ASSIGN + // Expression end because Ex command ended. case NUL: case NL: { @@ -661,6 +683,7 @@ static const char *const eltkn_type_tab[] = { [kExprLexParenthesis] = "Parenthesis", [kExprLexComma] = "Comma", [kExprLexArrow] = "Arrow", + [kExprLexAssignment] = "Assignment", }; const char *const eltkn_cmp_type_tab[] = { @@ -671,6 +694,13 @@ const char *const eltkn_cmp_type_tab[] = { [kExprCmpIdentical] = "Identical", }; +const char *const expr_asgn_type_tab[] = { + [kExprAsgnPlain] = "Plain", + [kExprAsgnAdd] = "Add", + [kExprAsgnSubtract] = "Subtract", + [kExprAsgnConcat] = "Concat", +}; + const char *const ccs_tab[] = { [kCCStrategyUseOption] = "UseOption", [kCCStrategyMatchCase] = "MatchCase", @@ -732,6 +762,8 @@ const char *viml_pexpr_repr_token(const ParserState *const pstate, (int)token.data.cmp.inv) TKNARGS(kExprLexMultiplication, "(type=%s)", eltkn_mul_type_tab[token.data.mul.type]) + TKNARGS(kExprLexAssignment, "(type=%s)", + expr_asgn_type_tab[token.data.ass.type]) TKNARGS(kExprLexRegister, "(name=%s)", intchar2str(token.data.reg.name)) case kExprLexDoubleQuotedString: TKNARGS(kExprLexSingleQuotedString, "(closed=%i)", @@ -811,6 +843,7 @@ const char *const east_node_type_tab[] = { [kExprNodeMod] = "Mod", [kExprNodeOption] = "Option", [kExprNodeEnvironment] = "Environment", + [kExprNodeAssignment] = "Assignment", }; /// Represent `int` character as a string @@ -933,6 +966,7 @@ const uint8_t node_maxchildren[] = { [kExprNodeMod] = 2, [kExprNodeOption] = 0, [kExprNodeEnvironment] = 0, + [kExprNodeAssignment] = 2, }; /// Free memory occupied by AST @@ -993,6 +1027,7 @@ void viml_pexpr_free_ast(ExprAST ast) case kExprNodeLambda: case kExprNodeDictLiteral: case kExprNodeCurlyBracesIdentifier: + case kExprNodeAssignment: case kExprNodeComma: case kExprNodeColon: case kExprNodeArrow: @@ -1111,6 +1146,8 @@ static struct { [kExprNodeCurlyBracesIdentifier] = { kEOpLvlComplexIdentifier, kEOpAssLeft }, + [kExprNodeAssignment] = { kEOpLvlAssignment, kEOpAssLeft }, + [kExprNodeComplexIdentifier] = { kEOpLvlValue, kEOpAssLeft }, [kExprNodePlainIdentifier] = { kEOpLvlValue, kEOpAssNo }, @@ -1478,6 +1515,17 @@ static inline void east_set_error(const ParserState *const pstate, } \ } while (0) +/// Determine whether given parse type is an assignment +/// +/// @param[in] pt Checked parse type. +/// +/// @return true if parsing an assignment, false otherwise. +static inline bool pt_is_assignment(const ExprASTParseType pt) + FUNC_ATTR_ALWAYS_INLINE FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT +{ + return (pt == kEPTAssignment || pt == kEPTSingleAssignment); +} + /// Structure used to define “string shifts” necessary to map string /// highlighting to actual strings. typedef struct { @@ -1839,6 +1887,9 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) ExprASTParseTypeStack pt_stack; kvi_init(pt_stack); kvi_push(pt_stack, kEPTExpr); + if (flags & kExprFlagsParseLet) { + kvi_push(pt_stack, kEPTAssignment); + } LexExprToken prev_token = { .type = kExprLexMissing }; bool highlighted_prev_spacing = false; // Lambda node, valid when parsing lambda arguments only. @@ -1938,33 +1989,83 @@ viml_pexpr_parse_process_token: // circumstances, and in any case runtime and not parse time errors. (*kv_Z(ast_stack, 1))->type = kExprNodeConcat; } - if (kv_last(pt_stack) == kEPTLambdaArguments - && ((want_node == kENodeOperator + // Pop some stack pt_stack items in case of misplaced nodes. + const bool is_single_assignment = kv_last(pt_stack) == kEPTSingleAssignment; + switch (kv_last(pt_stack)) { + case kEPTExpr: { + break; + } + case kEPTLambdaArguments: { + if ((want_node == kENodeOperator && tok_type != kExprLexComma && tok_type != kExprLexArrow) || (want_node == kENodeValue && !(cur_token.type == kExprLexPlainIdentifier && cur_token.data.var.scope == kExprVarScopeMissing && !cur_token.data.var.autoload) - && tok_type != kExprLexArrow))) { - lambda_node->data.fig.type_guesses.allow_lambda = false; - if (lambda_node->children != NULL - && lambda_node->children->type == kExprNodeComma) { - // If lambda has comma child this means that parser has already seen at - // least "{arg1,", so node cannot possibly be anything, but lambda. - - // Vim may give E121 or E720 in this case, but it does not look right to - // have either because both are results of reevaluation possibly-lambda - // node as a dictionary and here this is not going to happen. - ERROR_FROM_TOKEN_AND_MSG( - cur_token, _("E15: Expected lambda arguments list or arrow: %.*s")); - } else { - // Else it may appear that possibly-lambda node is actually a dictionary - // or curly-braces-name identifier. - lambda_node = NULL; - kv_drop(pt_stack, 1); + && tok_type != kExprLexArrow)) { + lambda_node->data.fig.type_guesses.allow_lambda = false; + if (lambda_node->children != NULL + && lambda_node->children->type == kExprNodeComma) { + // If lambda has comma child this means that parser has already seen at + // least "{arg1,", so node cannot possibly be anything, but lambda. + + // Vim may give E121 or E720 in this case, but it does not look right to + // have either because both are results of reevaluation possibly-lambda + // node as a dictionary and here this is not going to happen. + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Expected lambda arguments list or arrow: %.*s")); + } else { + // Else it may appear that possibly-lambda node is actually a dictionary + // or curly-braces-name identifier. + lambda_node = NULL; + kv_drop(pt_stack, 1); + } + } + break; + } + case kEPTSingleAssignment: { + if (tok_type == kExprLexBracket && !cur_token.data.brc.closing) { + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E475: Nested lists not allowed when assigning: %.*s")); + kv_drop(pt_stack, 2); + assert(kv_size(pt_stack)); + assert(kv_last(pt_stack) == kEPTExpr); + break; + } + FALLTHROUGH; + } + case kEPTAssignment: { + if (want_node == kENodeValue + && tok_type != kExprLexBracket + && tok_type != kExprLexPlainIdentifier + && (tok_type != kExprLexFigureBrace || cur_token.data.brc.closing) + && !(node_is_key && tok_type == kExprLexNumber) + && tok_type != kExprLexEnv + && tok_type != kExprLexOption + && tok_type != kExprLexRegister) { + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Expected value part of assignment lvalue: %.*s")); + kv_drop(pt_stack, 1); + } else if (want_node == kENodeOperator + && tok_type != kExprLexBracket + && (tok_type != kExprLexFigureBrace + || cur_token.data.brc.closing) + && tok_type != kExprLexDot + && (tok_type != kExprLexComma || !is_single_assignment) + && tok_type != kExprLexAssignment) { + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E15: Expected assignment operator or subscript: %.*s")); + kv_drop(pt_stack, 1); + } + assert(kv_size(pt_stack)); + break; } } + assert(kv_size(pt_stack)); const ExprASTParseType cur_pt = kv_last(pt_stack); assert(lambda_node == NULL || cur_pt == kEPTLambdaArguments); switch (tok_type) { @@ -2339,21 +2440,41 @@ viml_pexpr_parse_bracket_closing_error: } kvi_push(ast_stack, new_top_node_p); want_node = kENodeOperator; + if (cur_pt == kEPTSingleAssignment) { + kv_drop(pt_stack, 1); + } else if (cur_pt == kEPTAssignment) { + assert(ast.err.msg); + } else if (cur_pt == kEPTExpr + && kv_size(pt_stack) > 1 + && pt_is_assignment(kv_Z(pt_stack, 1))) { + kv_drop(pt_stack, 1); + } } else { if (want_node == kENodeValue) { - // Value means list literal. + // Value means list literal or list assignment. HL_CUR_TOKEN(List); NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral); *top_node_p = cur_node; kvi_push(ast_stack, &cur_node->children); want_node = kENodeValue; + if (cur_pt == kEPTAssignment) { + // Additional assignment parse type allows to easily forbid nested + // lists. + kvi_push(pt_stack, kEPTSingleAssignment); + } } else { + // Operator means subscript, also in assignment. But in assignment + // subscript may be pretty much any expression, so need to push + // kEPTExpr. if (prev_token.type == kExprLexSpacing) { OP_MISSING; } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeSubscript); ADD_OP_NODE(cur_node); HL_CUR_TOKEN(SubscriptBracket); + if (pt_is_assignment(cur_pt)) { + kvi_push(pt_stack, kEPTExpr); + } } } break; @@ -2458,15 +2579,31 @@ viml_pexpr_parse_figure_brace_closing_error: } kvi_push(ast_stack, new_top_node_p); want_node = kENodeOperator; + if (cur_pt == kEPTExpr + && kv_size(pt_stack) > 1 + && pt_is_assignment(kv_Z(pt_stack, 1))) { + kv_drop(pt_stack, 1); + } } else { if (want_node == kENodeValue) { HL_CUR_TOKEN(FigureBrace); // Value: may be any of lambda, dictionary literal and curly braces // name. - NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure); - cur_node->data.fig.type_guesses.allow_lambda = true; - cur_node->data.fig.type_guesses.allow_dict = true; - cur_node->data.fig.type_guesses.allow_ident = true; + + // Though if we are in an assignment this may only be a curly braces + // name. + if (pt_is_assignment(cur_pt)) { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeCurlyBracesIdentifier); + cur_node->data.fig.type_guesses.allow_lambda = false; + cur_node->data.fig.type_guesses.allow_dict = false; + cur_node->data.fig.type_guesses.allow_ident = true; + kvi_push(pt_stack, kEPTExpr); + } else { + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeUnknownFigure); + cur_node->data.fig.type_guesses.allow_lambda = true; + cur_node->data.fig.type_guesses.allow_dict = true; + cur_node->data.fig.type_guesses.allow_ident = true; + } if (pstate->colors) { cur_node->data.fig.opening_hl_idx = kv_size(*pstate->colors) - 1; } @@ -2484,6 +2621,9 @@ viml_pexpr_parse_figure_brace_closing_error: cur_node->data.fig.type_guesses.allow_dict = false; cur_node->data.fig.type_guesses.allow_ident = true; kvi_push(ast_stack, &cur_node->children); + if (pt_is_assignment(cur_pt)) { + kvi_push(pt_stack, kEPTExpr); + } want_node = kENodeValue; } while (0), Curly); @@ -2746,6 +2886,36 @@ viml_pexpr_parse_no_paren_closing_error: {} want_node = kENodeOperator; break; } + case kExprLexAssignment: { + if (cur_pt == kEPTAssignment) { + kv_drop(pt_stack, 1); + } else if (cur_pt == kEPTSingleAssignment) { + kv_drop(pt_stack, 2); + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E475: Expected closing bracket to end list assignment " + "lvalue: %.*s")); + } else { + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Misplaced assignment: %.*s")); + } + assert(kv_size(pt_stack)); + assert(kv_last(pt_stack) == kEPTExpr); + ADD_VALUE_IF_MISSING(_("E15: Unexpected assignment: %.*s")); + NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeAssignment); + cur_node->data.ass.type = cur_token.data.ass.type; + switch (cur_token.data.ass.type) { +#define HL_ASGN(asgn, hl) \ + case kExprAsgn##asgn: { HL_CUR_TOKEN(hl); break; } + HL_ASGN(Plain, PlainAssignment) + HL_ASGN(Add, AssignmentWithAddition) + HL_ASGN(Subtract, AssignmentWithSubtraction) + HL_ASGN(Concat, AssignmentWithConcatenation) +#undef HL_ASGN + } + ADD_OP_NODE(cur_node); + break; + } } viml_pexpr_parse_cycle_end: prev_token = cur_token; @@ -2862,6 +3032,7 @@ viml_pexpr_parse_end: // FIXME: Investigate whether above are OK to be present in the stack. break; } + case kExprNodeAssignment: case kExprNodeMod: case kExprNodeDivision: case kExprNodeMultiplication: diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h index 648f8cbc1f..23e172da75 100644 --- a/src/nvim/viml/parser/expressions.h +++ b/src/nvim/viml/parser/expressions.h @@ -51,6 +51,9 @@ typedef enum { kExprLexParenthesis, ///< Parenthesis, either opening or closing. kExprLexComma, ///< Comma. kExprLexArrow, ///< Arrow, like from lambda expressions. + kExprLexAssignment, ///< Assignment: `=` or `{op}=`. + // XXX When modifying this enum you need to also modify eltkn_type_tab in + // expressions.c and tests and, possibly, viml_pexpr_repr_token. } LexExprTokenType; typedef enum { @@ -68,6 +71,14 @@ typedef enum { kExprOptScopeLocal = 'l', } ExprOptScope; +/// All possible assignment types: `=` and `{op}=`. +typedef enum { + kExprAsgnPlain = 0, ///< Plain assignment: `=`. + kExprAsgnAdd, ///< Assignment augmented with addition: `+=`. + kExprAsgnSubtract, ///< Assignment augmented with subtraction: `-=`. + kExprAsgnConcat, ///< Assignment augmented with concatenation: `.=`. +} ExprAssignmentType; + #define EXPR_OPT_SCOPE_LIST \ ((char[]){ kExprOptScopeGlobal, kExprOptScopeLocal }) @@ -147,6 +158,10 @@ typedef struct { uint8_t base; ///< Base: 2, 8, 10 or 16. bool is_float; ///< True if number is a floating-point. } num; ///< For kExprLexNumber + + struct { + ExprAssignmentType type; + } ass; ///< For kExprLexAssignment } data; ///< Additional data, if needed. } LexExprToken; @@ -170,8 +185,8 @@ typedef enum { /// “EOC” is something like "|". It is fine with emitting EOC at the end of /// string still, with or without this flag set. kELFlagForbidEOC = (1 << 4), - // WARNING: whenever you add a new flag, alter klee_assume() statement in - // viml_expressions_lexer.c. + // XXX Whenever you add a new flag, alter klee_assume() statement in + // viml_expressions_lexer.c. } LexExprFlags; /// Expression AST node type @@ -233,6 +248,10 @@ typedef enum { kExprNodeMod, kExprNodeOption, kExprNodeEnvironment, + kExprNodeAssignment, + // XXX When modifying this list also modify east_node_type_tab both in parser + // and in tests, and you most likely will also have to alter list of + // highlight groups stored in highlight_init_cmdline variable. } ExprASTNodeType; typedef struct expr_ast_node ExprASTNode; @@ -301,6 +320,9 @@ struct expr_ast_node { const char *ident; ///< Environment variable name start. size_t ident_len; ///< Environment variable name length. } env; ///< For kExprNodeEnvironment. + struct { + ExprAssignmentType type; + } ass; ///< For kExprNodeAssignment } data; }; @@ -314,8 +336,15 @@ enum { /// When parsing expressions input by user bar is assumed to be a binary /// operator and other two are spacings. kExprFlagsDisallowEOC = (1 << 1), - // WARNING: whenever you add a new flag, alter klee_assume() statement in - // viml_expressions_parser.c. + /// Parse :let argument + /// + /// That mean that top level node must be an assignment and first nodes + /// belong to lvalues. + kExprFlagsParseLet = (1 << 2), + // XXX whenever you add a new flag, alter klee_assume() statement in + // viml_expressions_parser.c, nvim_parse_expression() flags parsing + // alongside with its documentation and flag sets in check_parsing() + // function in expressions parser functional and unit tests. } ExprParserFlags; /// AST error definition @@ -350,6 +379,9 @@ extern const char *const eltkn_cmp_type_tab[]; /// Array mapping ExprCaseCompareStrategy values to their stringified versions extern const char *const ccs_tab[]; +/// Array mapping ExprAssignmentType values to their stringified versions +extern const char *const expr_asgn_type_tab[]; + #ifdef INCLUDE_GENERATED_DECLARATIONS # include "viml/parser/expressions.h.generated.h" #endif -- cgit From 45445e2e03f1cbfa25dde76ccf3e24d0d297cabe Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 12 Nov 2017 03:52:26 +0300 Subject: unittests: Add some more edge test cases --- src/nvim/viml/parser/expressions.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 13f7131744..07bac89997 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -40,6 +40,13 @@ // E15: Invalid expression: [1, // // < , just exactly one E697 message. +// 6. Some expressions involving calling parenthesis which are treated +// separately by Vim even when not separated by spaces are treated as one +// expression by Neovim: e.g. ":echo (1)(1)" will yield runtime error after +// failing to call "1", while Vim will echo "1 1". Reasoning is the same: +// type of what is in the first expression is generally not known when +// parsing, so to have separate expressions like this separate them with +// spaces. #include #include -- cgit From 342239a9c53cf4857d18c0583d4cab1fdca534fa Mon Sep 17 00:00:00 2001 From: ZyX Date: Mon, 13 Nov 2017 01:10:39 +0300 Subject: unittests,viml/parser/expressions: Start adding asgn parsing tests --- src/nvim/viml/parser/expressions.c | 50 ++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 13 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 07bac89997..4bd3652292 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -1363,7 +1363,6 @@ static inline ParserPosition recol_pos(const ParserPosition pos, } \ } while (0) -// TODO(ZyX-I): actual condition /// Check whether it is possible to have next expression after current /// /// For :echo: `:echo @a @a` is a valid expression. `:echo (@a @a)` is not. @@ -1901,6 +1900,7 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags) bool highlighted_prev_spacing = false; // Lambda node, valid when parsing lambda arguments only. ExprASTNode *lambda_node = NULL; + size_t asgn_level = 0; do { const bool is_concat_or_subscript = ( want_node == kENodeValue @@ -2063,6 +2063,9 @@ viml_pexpr_parse_process_token: && tok_type != kExprLexDot && (tok_type != kExprLexComma || !is_single_assignment) && tok_type != kExprLexAssignment) { + if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) { + goto viml_pexpr_parse_end; + } ERROR_FROM_TOKEN_AND_MSG( cur_token, _("E15: Expected assignment operator or subscript: %.*s")); @@ -2429,6 +2432,10 @@ viml_pexpr_parse_valid_colon: ExprASTNode *new_top_node = *new_top_node_p; switch (new_top_node->type) { case kExprNodeListLiteral: { + if (pt_is_assignment(cur_pt) && new_top_node->children == NULL) { + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E475: Unable to assign to empty list: %.*s")); + } HL_CUR_TOKEN(List); break; } @@ -2447,14 +2454,18 @@ viml_pexpr_parse_bracket_closing_error: } kvi_push(ast_stack, new_top_node_p); want_node = kENodeOperator; - if (cur_pt == kEPTSingleAssignment) { - kv_drop(pt_stack, 1); - } else if (cur_pt == kEPTAssignment) { - assert(ast.err.msg); - } else if (cur_pt == kEPTExpr - && kv_size(pt_stack) > 1 - && pt_is_assignment(kv_Z(pt_stack, 1))) { - kv_drop(pt_stack, 1); + if (kv_size(ast_stack) <= asgn_level) { + assert(kv_size(ast_stack) == asgn_level); + asgn_level = 0; + if (cur_pt == kEPTSingleAssignment) { + kv_drop(pt_stack, 1); + } else if (cur_pt == kEPTAssignment) { + assert(ast.err.msg); + } else if (cur_pt == kEPTExpr + && kv_size(pt_stack) > 1 + && pt_is_assignment(kv_Z(pt_stack, 1))) { + kv_drop(pt_stack, 1); + } } } else { if (want_node == kENodeValue) { @@ -2480,6 +2491,7 @@ viml_pexpr_parse_bracket_closing_error: ADD_OP_NODE(cur_node); HL_CUR_TOKEN(SubscriptBracket); if (pt_is_assignment(cur_pt)) { + asgn_level = kv_size(ast_stack); kvi_push(pt_stack, kEPTExpr); } } @@ -2586,10 +2598,14 @@ viml_pexpr_parse_figure_brace_closing_error: } kvi_push(ast_stack, new_top_node_p); want_node = kENodeOperator; - if (cur_pt == kEPTExpr - && kv_size(pt_stack) > 1 - && pt_is_assignment(kv_Z(pt_stack, 1))) { - kv_drop(pt_stack, 1); + if (kv_size(ast_stack) <= asgn_level) { + assert(kv_size(ast_stack) == asgn_level); + if (cur_pt == kEPTExpr + && kv_size(pt_stack) > 1 + && pt_is_assignment(kv_Z(pt_stack, 1))) { + kv_drop(pt_stack, 1); + asgn_level = 0; + } } } else { if (want_node == kENodeValue) { @@ -2635,6 +2651,10 @@ viml_pexpr_parse_figure_brace_closing_error: } while (0), Curly); } + if (pt_is_assignment(cur_pt) + && !pt_is_assignment(kv_last(pt_stack))) { + asgn_level = kv_size(ast_stack); + } } break; } @@ -2755,6 +2775,10 @@ viml_pexpr_parse_figure_brace_closing_error: case kExprLexDot: { ADD_VALUE_IF_MISSING(_("E15: Unexpected dot: %.*s")); if (prev_token.type == kExprLexSpacing) { + if (cur_pt == kEPTAssignment) { + ERROR_FROM_TOKEN_AND_MSG( + cur_token, _("E15: Cannot concatenate in assignments: %.*s")); + } NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeConcat); HL_CUR_TOKEN(Concat); } else { -- cgit From c287893225bad586af486b37546f5982e5b1cd03 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 19 Nov 2017 19:22:54 +0300 Subject: viml/parser/expressions,unittests: Do better testing, fix found issues --- src/nvim/viml/parser/expressions.c | 40 ++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 4bd3652292..0824b3ca7d 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -2031,18 +2031,7 @@ viml_pexpr_parse_process_token: } break; } - case kEPTSingleAssignment: { - if (tok_type == kExprLexBracket && !cur_token.data.brc.closing) { - ERROR_FROM_TOKEN_AND_MSG( - cur_token, - _("E475: Nested lists not allowed when assigning: %.*s")); - kv_drop(pt_stack, 2); - assert(kv_size(pt_stack)); - assert(kv_last(pt_stack) == kEPTExpr); - break; - } - FALLTHROUGH; - } + case kEPTSingleAssignment: case kEPTAssignment: { if (want_node == kENodeValue && tok_type != kExprLexBracket @@ -2062,7 +2051,13 @@ viml_pexpr_parse_process_token: || cur_token.data.brc.closing) && tok_type != kExprLexDot && (tok_type != kExprLexComma || !is_single_assignment) - && tok_type != kExprLexAssignment) { + && tok_type != kExprLexAssignment + // Curly brace identifiers: will contain plain identifier or + // another curly brace in position where operator is wanted. + && !((tok_type == kExprLexPlainIdentifier + || (tok_type == kExprLexFigureBrace + && !cur_token.data.brc.closing)) + && prev_token.type != kExprLexSpacing)) { if (flags & kExprFlagsMulti && MAY_HAVE_NEXT_EXPR) { goto viml_pexpr_parse_end; } @@ -2457,9 +2452,7 @@ viml_pexpr_parse_bracket_closing_error: if (kv_size(ast_stack) <= asgn_level) { assert(kv_size(ast_stack) == asgn_level); asgn_level = 0; - if (cur_pt == kEPTSingleAssignment) { - kv_drop(pt_stack, 1); - } else if (cur_pt == kEPTAssignment) { + if (cur_pt == kEPTAssignment) { assert(ast.err.msg); } else if (cur_pt == kEPTExpr && kv_size(pt_stack) > 1 @@ -2467,10 +2460,12 @@ viml_pexpr_parse_bracket_closing_error: kv_drop(pt_stack, 1); } } + if (cur_pt == kEPTSingleAssignment && kv_size(ast_stack) == 1) { + kv_drop(pt_stack, 1); + } } else { if (want_node == kENodeValue) { // Value means list literal or list assignment. - HL_CUR_TOKEN(List); NEW_NODE_WITH_CUR_POS(cur_node, kExprNodeListLiteral); *top_node_p = cur_node; kvi_push(ast_stack, &cur_node->children); @@ -2479,7 +2474,12 @@ viml_pexpr_parse_bracket_closing_error: // Additional assignment parse type allows to easily forbid nested // lists. kvi_push(pt_stack, kEPTSingleAssignment); + } else if (cur_pt == kEPTSingleAssignment) { + ERROR_FROM_TOKEN_AND_MSG( + cur_token, + _("E475: Nested lists not allowed when assigning: %.*s")); } + HL_CUR_TOKEN(List); } else { // Operator means subscript, also in assignment. But in assignment // subscript may be pretty much any expression, so need to push @@ -2491,7 +2491,8 @@ viml_pexpr_parse_bracket_closing_error: ADD_OP_NODE(cur_node); HL_CUR_TOKEN(SubscriptBracket); if (pt_is_assignment(cur_pt)) { - asgn_level = kv_size(ast_stack); + assert(want_node == kENodeValue); // Subtract 1 for NULL at top. + asgn_level = kv_size(ast_stack) - 1; kvi_push(pt_stack, kEPTExpr); } } @@ -2653,7 +2654,8 @@ viml_pexpr_parse_figure_brace_closing_error: } if (pt_is_assignment(cur_pt) && !pt_is_assignment(kv_last(pt_stack))) { - asgn_level = kv_size(ast_stack); + assert(want_node == kENodeValue); // Subtract 1 for NULL at top. + asgn_level = kv_size(ast_stack) - 1; } } break; -- cgit From f20f97c936f1438589c8176f62ce69c26e255f85 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 19 Nov 2017 21:13:27 +0300 Subject: *: Fix linter errors --- src/nvim/viml/parser/expressions.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 0824b3ca7d..6c7c328b6d 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -2014,17 +2014,20 @@ viml_pexpr_parse_process_token: lambda_node->data.fig.type_guesses.allow_lambda = false; if (lambda_node->children != NULL && lambda_node->children->type == kExprNodeComma) { - // If lambda has comma child this means that parser has already seen at - // least "{arg1,", so node cannot possibly be anything, but lambda. - - // Vim may give E121 or E720 in this case, but it does not look right to - // have either because both are results of reevaluation possibly-lambda - // node as a dictionary and here this is not going to happen. + // If lambda has comma child this means that parser has already seen + // at least "{arg1,", so node cannot possibly be anything, but + // lambda. + + // Vim may give E121 or E720 in this case, but it does not look + // right to have either because both are results of reevaluation + // possibly-lambda node as a dictionary and here this is not going + // to happen. ERROR_FROM_TOKEN_AND_MSG( - cur_token, _("E15: Expected lambda arguments list or arrow: %.*s")); + cur_token, + _("E15: Expected lambda arguments list or arrow: %.*s")); } else { - // Else it may appear that possibly-lambda node is actually a dictionary - // or curly-braces-name identifier. + // Else it may appear that possibly-lambda node is actually + // a dictionary or curly-braces-name identifier. lambda_node = NULL; kv_drop(pt_stack, 1); } -- cgit From 17077b68133a62d0dc1b84cb48779464c117e028 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 26 Nov 2017 16:08:53 +0300 Subject: viml/parser/expressions: Make $ENV not depend on &isident --- src/nvim/viml/parser/expressions.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 6c7c328b6d..63ad6bab35 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -47,6 +47,8 @@ // type of what is in the first expression is generally not known when // parsing, so to have separate expressions like this separate them with // spaces. +// 7. 'isident' no longer applies to environment variables, they always include +// ASCII alphanumeric characters and underscore and nothing except this. #include #include @@ -383,10 +385,14 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) break; } +#define ISWORD_OR_AUTOLOAD(x) \ + (ASCII_ISALNUM(x) || (x) == AUTOLOAD_CHAR || (x) == '_') +#define ISWORD(x) \ + (ASCII_ISALNUM(x) || (x) == '_') + // Environment variable. case '$': { - // FIXME: Parser function can’t be thread-safe with vim_isIDc. - CHARREG(kExprLexEnv, vim_isIDc); + CHARREG(kExprLexEnv, ISWORD); break; } @@ -400,10 +406,6 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '_': { -#define ISWORD_OR_AUTOLOAD(x) \ - (ASCII_ISALNUM(x) || (x) == AUTOLOAD_CHAR || (x) == '_') -#define ISWORD(x) \ - (ASCII_ISALNUM(x) || (x) == '_') ret.data.var.scope = 0; ret.data.var.autoload = false; CHARREG(kExprLexPlainIdentifier, ISWORD); @@ -441,9 +443,10 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD); } break; -#undef ISWORD_OR_AUTOLOAD -#undef ISWORD } + +#undef ISWORD +#undef ISWORD_OR_AUTOLOAD #undef CHARREG // Option. -- cgit From 36a4f3a259ffa282129b18358cce4130397077c5 Mon Sep 17 00:00:00 2001 From: ZyX Date: Sun, 26 Nov 2017 16:57:42 +0300 Subject: viml/parser/expressions: Make sure that listed nodes may be present With the new test leaving `assert(false);` for any of the cases makes tests crash. --- src/nvim/viml/parser/expressions.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 63ad6bab35..9773e60bbd 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -3065,12 +3065,9 @@ viml_pexpr_parse_end: // to be caught later. break; } + case kExprNodeSubscript: case kExprNodeConcatOrSubscript: case kExprNodeComplexIdentifier: - case kExprNodeSubscript: { - // FIXME: Investigate whether above are OK to be present in the stack. - break; - } case kExprNodeAssignment: case kExprNodeMod: case kExprNodeDivision: -- cgit From de45ec0146486c49719ff6f6dcceb4914b471c7a Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 30 Nov 2017 02:01:49 +0300 Subject: keymap: Do not use vim_isIDc in keymap.c Note: there are three changes to ascii_isident. Reverting first two (in find_special_key and first in get_special_key_code) normally fails the new test with empty &isident, but reverting the third does not. Hence adding `>` to &isident. Ref vim/vim#2389. --- src/nvim/viml/parser/expressions.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index 9773e60bbd..cfcde6bb38 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -386,13 +386,11 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) } #define ISWORD_OR_AUTOLOAD(x) \ - (ASCII_ISALNUM(x) || (x) == AUTOLOAD_CHAR || (x) == '_') -#define ISWORD(x) \ - (ASCII_ISALNUM(x) || (x) == '_') + (ascii_isident(x) || (x) == AUTOLOAD_CHAR) // Environment variable. case '$': { - CHARREG(kExprLexEnv, ISWORD); + CHARREG(kExprLexEnv, ascii_isident); break; } @@ -408,7 +406,7 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) case '_': { ret.data.var.scope = 0; ret.data.var.autoload = false; - CHARREG(kExprLexPlainIdentifier, ISWORD); + CHARREG(kExprLexPlainIdentifier, ascii_isident); // "is" and "isnot" operators. if (!(flags & kELFlagIsNotCmp) && ((ret.len == 2 && memcmp(pline.data, "is", 2) == 0) @@ -445,7 +443,6 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags) break; } -#undef ISWORD #undef ISWORD_OR_AUTOLOAD #undef CHARREG -- cgit From 5ab0f988caffad5e8c87a075cbd3f91f0f7e002c Mon Sep 17 00:00:00 2001 From: ZyX Date: Thu, 30 Nov 2017 11:53:25 +0300 Subject: *: Replace all occurrences of NVim with Nvim --- src/nvim/viml/parser/expressions.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c index cfcde6bb38..4196ecb9d2 100644 --- a/src/nvim/viml/parser/expressions.c +++ b/src/nvim/viml/parser/expressions.c @@ -1326,7 +1326,7 @@ static inline ParserPosition recol_pos(const ParserPosition pos, } /// Get highlight group name -#define HL(g) (is_invalid ? "NVimInvalid" #g : "NVim" #g) +#define HL(g) (is_invalid ? "NvimInvalid" #g : "Nvim" #g) /// Highlight current token with the given group #define HL_CUR_TOKEN(g) \ @@ -2570,7 +2570,7 @@ viml_pexpr_parse_bracket_closing_error: new_top_node, _("E15: Don't know what figure brace means: %.*s")); if (pstate->colors) { - // Will reset to NVimInvalidFigureBrace. + // Will reset to NvimInvalidFigureBrace. kv_A(*pstate->colors, new_top_node->data.fig.opening_hl_idx).group = ( HL(FigureBrace)); -- cgit From 8bd1bbcec817443b20870d5220063c363ce7edb8 Mon Sep 17 00:00:00 2001 From: James McCoy Date: Sun, 11 Mar 2018 17:23:27 -0400 Subject: Add missing PVS headers to new files --- src/nvim/viml/parser/parser.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/nvim/viml/parser') diff --git a/src/nvim/viml/parser/parser.c b/src/nvim/viml/parser/parser.c index 08d8846018..8d26d08ea7 100644 --- a/src/nvim/viml/parser/parser.c +++ b/src/nvim/viml/parser/parser.c @@ -1,3 +1,6 @@ +// This is an open source non-commercial project. Dear PVS-Studio, please check +// it. PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com + #include "nvim/viml/parser/parser.h" #ifdef INCLUDE_GENERATED_DECLARATIONS -- cgit