diff options
author | Josh Rahm <joshuarahm@gmail.com> | 2022-12-17 22:38:48 -0700 |
---|---|---|
committer | Josh Rahm <joshuarahm@gmail.com> | 2022-12-17 22:39:05 -0700 |
commit | 01685ab88228fb602cb0e408d93560e76e1371a1 (patch) | |
tree | 861a5ce9fc874ff7440f6d855758fdb1d86d4ffe /src/Language/Fiddle/Tokenizer.hs | |
parent | 47c776413ed4e11839ad6838575d0077ddd496a3 (diff) | |
download | fiddle-01685ab88228fb602cb0e408d93560e76e1371a1.tar.gz fiddle-01685ab88228fb602cb0e408d93560e76e1371a1.tar.bz2 fiddle-01685ab88228fb602cb0e408d93560e76e1371a1.zip |
WIP: Basic parser implemented.
The parser is completely untested and probably broken, but it's probably
pretty close becasue it does typecheck. This is a Work-in-progress.
Diffstat (limited to 'src/Language/Fiddle/Tokenizer.hs')
-rw-r--r-- | src/Language/Fiddle/Tokenizer.hs | 31 |
1 files changed, 20 insertions, 11 deletions
diff --git a/src/Language/Fiddle/Tokenizer.hs b/src/Language/Fiddle/Tokenizer.hs index d3239fd..ec41042 100644 --- a/src/Language/Fiddle/Tokenizer.hs +++ b/src/Language/Fiddle/Tokenizer.hs @@ -1,29 +1,34 @@ {-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE OverloadedStrings #-} + module Language.Fiddle.Tokenizer where import Data.Char (isDigit) import Data.Text (Text) +import qualified Data.Text import Language.Fiddle.Types import Text.Parsec import qualified Text.Parsec data T = KWAssertPos - | Ident !String + | TokIdent !Text | KWAt - | KWBittype + | KWBits | KWEnum - | CommentTok !String - | DocCommentTok !String + | TokComment !Text + | TokDocComment !Text | KWLocation | KWObject | KWObjtype | KWOption | KWPackage | KWReg + | KWReserved | KWRo | KWWo - | LitNum !String + | KWRw + | TokLitNum !Text | TokColon | TokComma | TokEq @@ -48,10 +53,11 @@ parseToken = spaces *> tok parseToken' <* spaces Token t . SourceSpan p1 <$> getPosition + parseAlNumTok :: Text -> T parseAlNumTok str = case str of "at" -> KWAt - "bittype" -> KWBittype + "bits" -> KWBits "enum" -> KWEnum "location" -> KWLocation "object" -> KWObject @@ -61,19 +67,22 @@ parseToken = spaces *> tok parseToken' <* spaces "reg" -> KWReg "ro" -> KWRo "wo" -> KWWo - (h : _) | isDigit h -> LitNum str - ident -> Ident ident + "rw" -> KWRw + "reserved" -> KWReserved + "assert_pos" -> KWAssertPos + (Data.Text.head -> h) | isDigit h -> TokLitNum str + ident -> TokIdent ident parseComment = try ( do string "//" - CommentTok <$> manyTill anyChar (char '\n') + TokComment . Data.Text.pack <$> manyTill anyChar (char '\n') ) <|> try ( do string "/**" - DocCommentTok <$> manyTill anyChar (try $ string "*/") + TokDocComment . Data.Text.pack <$> manyTill anyChar (try $ string "*/") ) parseSymbol = @@ -93,7 +102,7 @@ parseToken = spaces *> tok parseToken' <* spaces a $> b = a >> return b parseToken' = - fmap parseAlNumTok (many1 (alphaNum <|> char '_')) + fmap (parseAlNumTok . Data.Text.pack) (many1 (alphaNum <|> char '_')) <|> parseComment <|> parseSymbol |