From 01685ab88228fb602cb0e408d93560e76e1371a1 Mon Sep 17 00:00:00 2001 From: Josh Rahm Date: Sat, 17 Dec 2022 22:38:48 -0700 Subject: WIP: Basic parser implemented. The parser is completely untested and probably broken, but it's probably pretty close becasue it does typecheck. This is a Work-in-progress. --- src/Language/Fiddle/Tokenizer.hs | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'src/Language/Fiddle/Tokenizer.hs') diff --git a/src/Language/Fiddle/Tokenizer.hs b/src/Language/Fiddle/Tokenizer.hs index d3239fd..ec41042 100644 --- a/src/Language/Fiddle/Tokenizer.hs +++ b/src/Language/Fiddle/Tokenizer.hs @@ -1,29 +1,34 @@ {-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE OverloadedStrings #-} + module Language.Fiddle.Tokenizer where import Data.Char (isDigit) import Data.Text (Text) +import qualified Data.Text import Language.Fiddle.Types import Text.Parsec import qualified Text.Parsec data T = KWAssertPos - | Ident !String + | TokIdent !Text | KWAt - | KWBittype + | KWBits | KWEnum - | CommentTok !String - | DocCommentTok !String + | TokComment !Text + | TokDocComment !Text | KWLocation | KWObject | KWObjtype | KWOption | KWPackage | KWReg + | KWReserved | KWRo | KWWo - | LitNum !String + | KWRw + | TokLitNum !Text | TokColon | TokComma | TokEq @@ -48,10 +53,11 @@ parseToken = spaces *> tok parseToken' <* spaces Token t . SourceSpan p1 <$> getPosition + parseAlNumTok :: Text -> T parseAlNumTok str = case str of "at" -> KWAt - "bittype" -> KWBittype + "bits" -> KWBits "enum" -> KWEnum "location" -> KWLocation "object" -> KWObject @@ -61,19 +67,22 @@ parseToken = spaces *> tok parseToken' <* spaces "reg" -> KWReg "ro" -> KWRo "wo" -> KWWo - (h : _) | isDigit h -> LitNum str - ident -> Ident ident + "rw" -> KWRw + "reserved" -> KWReserved + "assert_pos" -> KWAssertPos + (Data.Text.head -> h) | isDigit h -> TokLitNum str + ident -> TokIdent ident parseComment = try ( do string "//" - CommentTok <$> manyTill anyChar (char '\n') + TokComment . Data.Text.pack <$> manyTill anyChar (char '\n') ) <|> try ( do string "/**" - DocCommentTok <$> manyTill anyChar (try $ string "*/") + TokDocComment . Data.Text.pack <$> manyTill anyChar (try $ string "*/") ) parseSymbol = @@ -93,7 +102,7 @@ parseToken = spaces *> tok parseToken' <* spaces a $> b = a >> return b parseToken' = - fmap parseAlNumTok (many1 (alphaNum <|> char '_')) + fmap (parseAlNumTok . Data.Text.pack) (many1 (alphaNum <|> char '_')) <|> parseComment <|> parseSymbol -- cgit