viml/parser/expressions: Add a way to adjust lexer

It also adds support for kExprLexOr which for some reason was forgotten. It was only made sure that KLEE test compiles in non-KLEE mode, not that something works or that KLEE is able to run tests.
author: ZyX <kp-pav@yandex.ru> 2017-09-28 00:40:25 +0300
committer: ZyX <kp-pav@yandex.ru> 2017-10-08 22:25:08 +0300
commit: 9fa8f7fc0a24371f7956450d840bdae8a2fc9a51 (patch)
tree: c561ac7b67b68a811873834fafa4fa5f423fd384 /src
parent: 0987d3b10f36202e9f0289b50298e69aaf2fa4d2 (diff)
download: rneovim-9fa8f7fc0a24371f7956450d840bdae8a2fc9a51.tar.gz
rneovim-9fa8f7fc0a24371f7956450d840bdae8a2fc9a51.tar.bz2
rneovim-9fa8f7fc0a24371f7956450d840bdae8a2fc9a51.zip
2 files changed, 100 insertions, 33 deletions
diff --git a/src/nvim/viml/parser/expressions.c b/src/nvim/viml/parser/expressions.c
index cabf2dac58..3027c0046b 100644
--- a/src/nvim/viml/parser/expressions.c
+++ b/src/nvim/viml/parser/expressions.c
@@ -47,10 +47,10 @@ typedef enum {
 /// Get next token for the VimL expression input
 ///
 /// @param  pstate  Parser state.
-/// @param[in]  peek  If true, do not advance pstate cursor.
+/// @param[in]  flags  Flags, @see LexExprFlags.
 ///
 /// @return Next token.
-LexExprToken viml_pexpr_next_token(ParserState *const pstate, const bool peek)
+LexExprToken viml_pexpr_next_token(ParserState *const pstate, const int flags)
   FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
 {
   LexExprToken ret = {
@@ -153,12 +153,33 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const bool peek)
     }
 
     // Number.
-    // Note: determining whether dot is (not) a part of a float needs more
-    // context, so lexer does not do this.
-    // FIXME: Resolve ambiguity by additional argument.
     case '0': case '1': case '2': case '3': case '4': case '5': case '6':
     case '7': case '8': case '9': {
+      ret.data.num.is_float = false;
       CHARREG(kExprLexNumber, ascii_isdigit);
+      if (flags & kELFlagAllowFloat) {
+        if (pline.size > ret.len + 1
+            && pline.data[ret.len] == '.'
+            && ascii_isdigit(pline.data[ret.len + 1])) {
+          ret.len++;
+          ret.data.num.is_float = true;
+          CHARREG(kExprLexNumber, ascii_isdigit);
+          if (pline.size > ret.len + 1
+              && (pline.data[ret.len] == 'e'
+                  || pline.data[ret.len] == 'E')
+              && ((pline.size > ret.len + 2
+                   && (pline.data[ret.len + 1] == '+'
+                       || pline.data[ret.len + 1] == '-')
+                   && ascii_isdigit(pline.data[ret.len + 2]))
+                  || ascii_isdigit(pline.data[ret.len + 1]))) {
+            ret.len++;
+            if (pline.data[ret.len] == '+' || pline.data[ret.len] == '-') {
+              ret.len++;
+            }
+            CHARREG(kExprLexNumber, ascii_isdigit);
+          }
+        }
+      }
       break;
     }
 
@@ -187,8 +208,9 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const bool peek)
       ret.data.var.autoload = false;
       CHARREG(kExprLexPlainIdentifier, ISWORD);
       // "is" and "isnot" operators.
-      if ((ret.len == 2 && memcmp(pline.data, "is", 2) == 0)
-          || (ret.len == 5 && memcmp(pline.data, "isnot", 5) == 0)) {
+      if (!(flags & kELFlagIsNotCmp)
+          && ((ret.len == 2 && memcmp(pline.data, "is", 2) == 0)
+              || (ret.len == 5 && memcmp(pline.data, "isnot", 5) == 0))) {
         ret.type = kExprLexComparison;
         ret.data.cmp.type = kExprLexCmpIdentical;
         ret.data.cmp.inv = (ret.len == 5);
@@ -197,14 +219,14 @@ LexExprToken viml_pexpr_next_token(ParserState *const pstate, const bool peek)
       } else if (ret.len == 1
                  && pline.size > 1
                  && strchr("sgvbwtla", schar) != NULL
-                 && pline.data[ret.len] == ':') {
+                 && pline.data[ret.len] == ':'
+                 && !(flags & kELFlagForbidScope)) {
         ret.len++;
         ret.data.var.scope = schar;
         CHARREG(kExprLexPlainIdentifier, ISWORD_OR_AUTOLOAD);
         ret.data.var.autoload = (
             memchr(pline.data + 2, AUTOLOAD_CHAR, ret.len - 2)
             != NULL);
-      // FIXME: Resolve ambiguity with an argument to the lexer function.
       // Previous CHARREG stopped at autoload character in order to make it
       // possible to detect `is#`. Continue now with autoload characters
       // included.
@@ -373,7 +395,30 @@ viml_pexpr_next_token_invalid_comparison:
     // Expression end because Ex command ended.
     case NUL:
     case NL: {
-      ret.type = kExprLexEOC;
+      if (flags & kELFlagForbidEOC) {
+        ret.type = kExprLexInvalid;
+        ret.data.err.msg = _("E15: Unexpected EOC character: %.*s");
+        ret.data.err.type = kExprLexSpacing;
+      } else {
+        ret.type = kExprLexEOC;
+      }
+      break;
+    }
+
+    case '|': {
+      if (pline.size >= 2 && pline.data[ret.len] == '|') {
+        // "||" is or.
+        ret.len++;
+        ret.type = kExprLexOr;
+      } else if (flags & kELFlagForbidEOC) {
+        // Note: `<C-r>=1 | 2<CR>` actually yields 1 in Vim without any
+        //       errors. This will be changed here.
+        ret.type = kExprLexInvalid;
+        ret.data.err.msg = _("E15: Unexpected EOC character: %.*s");
+        ret.data.err.type = kExprLexOr;
+      } else {
+        ret.type = kExprLexEOC;
+      }
       break;
     }
 
@@ -389,7 +434,7 @@ viml_pexpr_next_token_invalid_comparison:
   }
 #undef GET_CCS
 viml_pexpr_next_token_adv_return:
-  if (!peek) {
+  if (!(flags & kELFlagPeek)) {
     viml_parser_advance(pstate, ret.len);
   }
   return ret;
@@ -990,34 +1035,28 @@ ExprAST viml_pexpr_parse(ParserState *const pstate, const int flags)
   // Lambda node, valid when parsing lambda arguments only.
   ExprASTNode *lambda_node = NULL;
   do {
-    LexExprToken cur_token = viml_pexpr_next_token(pstate, true);
+    const int want_node_to_lexer_flags[] = {
+      [kENodeValue] = kELFlagIsNotCmp,
+      [kENodeOperator] = kELFlagForbidScope,
+      [kENodeArgument] = kELFlagIsNotCmp,
+      [kENodeArgumentSeparator] = kELFlagForbidScope,
+    };
+    // FIXME Determine when (not) to allow floating-point numbers.
+    const int lexer_additional_flags = (
+        kELFlagPeek
+        | ((flags & kExprFlagsDisallowEOC) ? kELFlagForbidEOC : 0));
+    LexExprToken cur_token = viml_pexpr_next_token(
+        pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags);
     if (cur_token.type == kExprLexEOC) {
-      if (flags & kExprFlagsDisallowEOC) {
-        if (cur_token.len == 0) {
-          // It is end of string, break.
-          break;
-        } else {
-          // It is NL, NUL or bar.
-          //
-          // Note: `<C-r>=1 | 2<CR>` actually yields 1 in Vim without any
-          //       errors. This will be changed here.
-          cur_token.type = kExprLexInvalid;
-          cur_token.data.err.msg = _("E15: Unexpected EOC character: %.*s");
-          const ParserLine pline = (
-              pstate->reader.lines.items[cur_token.start.line]);
-          const char eoc_char = pline.data[cur_token.start.col];
-          cur_token.data.err.type = ((eoc_char == NUL || eoc_char == NL)
-                                     ? kExprLexSpacing
-                                     : kExprLexOr);
-        }
-      } else {
-        break;
-      }
+      break;
     }
     LexExprTokenType tok_type = cur_token.type;
     const bool token_invalid = (tok_type == kExprLexInvalid);
     bool is_invalid = token_invalid;
 viml_pexpr_parse_process_token:
+    // May use different flags this time.
+    cur_token = viml_pexpr_next_token(
+        pstate, want_node_to_lexer_flags[want_node] | lexer_additional_flags);
     if (tok_type == kExprLexSpacing) {
       if (is_invalid) {
         HL_CUR_TOKEN(Spacing);
diff --git a/src/nvim/viml/parser/expressions.h b/src/nvim/viml/parser/expressions.h
index 13640ec137..64abab9e41 100644
--- a/src/nvim/viml/parser/expressions.h
+++ b/src/nvim/viml/parser/expressions.h
@@ -109,9 +109,37 @@ typedef struct {
       LexExprTokenType type;  ///< Suggested type for parsing incorrect code.
       const char *msg;  ///< Error message.
     } err;  ///< For kExprLexInvalid
+
+    struct {
+      bool is_float;  ///< True if number is a floating-point.
+    } num;  ///< For kExprLexNumber
   } data;  ///< Additional data, if needed.
 } LexExprToken;
 
+typedef enum {
+  /// If set, “pointer” to the current byte in pstate will not be shifted
+  kELFlagPeek = (1 << 0),
+  /// Determines whether scope is allowed to come before the identifier
+  kELFlagForbidScope = (1 << 1),
+  /// Determines whether floating-point numbers are allowed
+  ///
+  /// I.e. whether dot is a decimal point separator or is not a part of
+  /// a number at all.
+  kELFlagAllowFloat = (1 << 2),
+  /// Determines whether `is` and `isnot` are seen as comparison operators
+  ///
+  /// If set they are supposed to be just regular identifiers.
+  kELFlagIsNotCmp = (1 << 3),
+  /// Determines whether EOC tokens are allowed
+  ///
+  /// If set then it will yield Invalid token with E15 in place of EOC one if
+  /// “EOC” is something like "|". It is fine with emitting EOC at the end of
+  /// string still, with or without this flag set.
+  kELFlagForbidEOC = (1 << 4),
+  // WARNING: whenever you add a new flag, alter klee_assume() statement in
+  // viml_expressions_lexer.c.
+} LexExprFlags;
+
 /// Expression AST node type
 typedef enum {
   kExprNodeMissing = 'X',
author	ZyX <kp-pav@yandex.ru>	2017-09-28 00:40:25 +0300
committer	ZyX <kp-pav@yandex.ru>	2017-10-08 22:25:08 +0300
commit	9fa8f7fc0a24371f7956450d840bdae8a2fc9a51 (patch)
tree	c561ac7b67b68a811873834fafa4fa5f423fd384 /src
parent	0987d3b10f36202e9f0289b50298e69aaf2fa4d2 (diff)
download	rneovim-9fa8f7fc0a24371f7956450d840bdae8a2fc9a51.tar.gz rneovim-9fa8f7fc0a24371f7956450d840bdae8a2fc9a51.tar.bz2 rneovim-9fa8f7fc0a24371f7956450d840bdae8a2fc9a51.zip