summaryrefslogtreecommitdiff
path: root/src/Language/Fiddle/Parser.hs
diff options
context:
space:
mode:
authorJosh Rahm <joshuarahm@gmail.com>2023-01-08 22:44:44 -0700
committerJosh Rahm <joshuarahm@gmail.com>2023-01-08 22:47:50 -0700
commitdef481d234ce5e1671d9faaa539477de8cd14640 (patch)
tree76bcd95f030571c506a73ddb021eeed7a6f6aec1 /src/Language/Fiddle/Parser.hs
parent0c45ef8884ec82d26c47e952132d54d4bb8a9238 (diff)
downloadfiddle-def481d234ce5e1671d9faaa539477de8cd14640.tar.gz
fiddle-def481d234ce5e1671d9faaa539477de8cd14640.tar.bz2
fiddle-def481d234ce5e1671d9faaa539477de8cd14640.zip
Parser is able to parse the goal file.
Diffstat (limited to 'src/Language/Fiddle/Parser.hs')
-rw-r--r--src/Language/Fiddle/Parser.hs248
1 files changed, 157 insertions, 91 deletions
diff --git a/src/Language/Fiddle/Parser.hs b/src/Language/Fiddle/Parser.hs
index 59faeda..94fbbf9 100644
--- a/src/Language/Fiddle/Parser.hs
+++ b/src/Language/Fiddle/Parser.hs
@@ -7,15 +7,16 @@ module Language.Fiddle.Parser
)
where
-import Debug.Trace
import Data.Functor.Identity
import Data.Text (Text)
import qualified Data.Text
+import Debug.Trace
import Language.Fiddle.Ast
import Language.Fiddle.Tokenizer
import Language.Fiddle.Types
import Text.Parsec hiding (token)
import qualified Text.Parsec
+import Text.Printf
type F = Either ParseError
@@ -23,49 +24,98 @@ type S = [Token SourceSpan]
type P = ParsecT S () Identity
+type A = Commented SourceSpan
+
type Pa (a :: Stage -> (* -> *) -> * -> *) = P (a 'Stage1 F (Commented SourceSpan))
+comment :: P Comment
+comment =
+ token $ \case
+ (TokComment c) -> Just (NormalComment c)
+ (TokDocComment c) -> Just (DocComment c)
+ _ -> Nothing
+
+isComment :: Token s -> Bool
+isComment (Token t _) =
+ case t of
+ (TokComment _) -> True
+ (TokDocComment _) -> True
+ _ -> False
+
+-- Removes trailing comments from a list of tokens. Comments that don't preceed
+-- an actual language token have minimal semantic value and are thus discarded.
+stripTrailingComments :: [Token s] -> [Token s]
+stripTrailingComments = reverse . dropWhile isComment . reverse
+
fiddleUnit :: Pa FiddleUnit
fiddleUnit = do
- withMeta $
- FiddleUnit <$> many1 (fiddleDecl <* tok TokSemi)
+ withMeta
+ ( FiddleUnit <$> many1 (fiddleDecl <* tok TokSemi)
+ )
+ <* many comment
fiddleDecl :: Pa FiddleDecl
-fiddleDecl =
- withMeta $
- choice
- [ tok KWOption >> OptionDecl <$> ident <*> ident,
- tok KWPackage >> PackageDecl
- <$> ident
- <*> defer body packageBody,
- tok KWLocation >> LocationDecl <$> ident <*> (tok TokEq >> expression),
- tok KWBits >> BitsDecl <$> ident <*> (tok TokColon >> bitType),
- tok KWObjtype
- >> ObjTypeDecl <$> ident <*> (tok TokColon >> defer body objTypeBody),
- tok KWObject
- >> ObjectDecl
+fiddleDecl = do
+ withMeta $ do
+ t <- tokenType <$> anyToken
+ case t of
+ KWOption -> OptionDecl <$> ident <*> ident
+ KWPackage -> do
+ p <-
+ PackageDecl
<$> ident
- <*> (tok KWAt *> expression)
- <*> (tok TokColon *> objType)
- ]
+ <*> defer body packageBody
+ printNext
+ return p
+ KWLocation -> LocationDecl <$> ident <*> (tok TokEq >> expression)
+ KWBits -> BitsDecl <$> ident <*> (tok TokColon >> bitType)
+ KWObjtype ->
+ ObjTypeDecl <$> ident <*> (tok TokColon >> defer body objTypeBody)
+ KWObject ->
+ ObjectDecl
+ <$> ident
+ <*> (tok KWAt *> expression)
+ <*> (tok TokColon *> objType)
+ _ ->
+ fail $
+ printf "Unexpected token %s. Expected top-level declaration." (show t)
+
+-- choice
+-- [ tok KWOption >> OptionDecl <$> ident <*> ident,
+-- tok KWPackage >> PackageDecl
+-- <$> ident
+-- <*> defer body packageBody,
+-- tok KWLocation >> LocationDecl <$> ident <*> (tok TokEq >> expression),
+-- tok KWBits >> BitsDecl <$> ident <*> (tok TokColon >> bitType),
+-- tok KWObjtype
+-- >> ObjTypeDecl <$> ident <*> (tok TokColon >> defer body objTypeBody),
+-- tok KWObject
+-- >> ObjectDecl
+-- <$> ident
+-- <*> (tok KWAt *> expression)
+-- <*> (tok TokColon *> objType)
+-- ]
objType :: Pa ObjType
-objType = recur objType'
+objType = do
+ base <- withMeta baseObj
+ recur' <- recur
+ return $ recur' base
where
- recur b = do
- recur
- ( try $
- withMeta $ do
- t <- b
- e <- tok TokLBrace *> expression <* tok TokRBrace
- return (ArrayObjType t e)
- )
- <|> b
+ recur :: P (ObjType Stage1 F A -> ObjType Stage1 F A)
+ recur =
+ ( do
+ withMeta $ do
+ expr <- tok TokLBracket *> expression <* tok TokRBracket
+ recur' <- recur
+ return (\met base -> recur' (ArrayObjType base expr met))
+ )
+ <|> return id
- objType' =
- withMeta $
- (ReferencedObjType <$> ident)
- <|> (AnonymousObjType <$> defer body objTypeBody)
+ baseObj :: P (A -> ObjType Stage1 F A)
+ baseObj =
+ (ReferencedObjType <$> ident)
+ <|> (AnonymousObjType <$> defer body objTypeBody)
exprInParen :: Pa Expression
exprInParen = tok TokLParen *> expression <* tok TokRParen
@@ -73,8 +123,7 @@ exprInParen = tok TokLParen *> expression <* tok TokRParen
objTypeBody :: Pa ObjTypeBody
objTypeBody =
withMeta $
- withinBody $
- ObjTypeBody <$> many (objTypeDecl <* tok TokSemi)
+ ObjTypeBody <$> many (objTypeDecl <* tok TokSemi)
objTypeDecl :: Pa ObjTypeDecl
objTypeDecl =
@@ -84,10 +133,10 @@ objTypeDecl =
AssertPosStatement <$> exprInParen
)
<|> ( do
+ mod <- optionMaybe modifier
tok KWReg
- RegisterDecl
- <$> optionMaybe modifier
- <*> optionMaybe ident
+ RegisterDecl mod
+ <$> optionMaybe ident
<*> exprInParen
<*> optionMaybe (tok TokColon *> registerBody)
)
@@ -108,8 +157,7 @@ registerBody = withMeta $ RegisterBody <$> defer body deferredRegisterBody
deferredRegisterBody :: Pa DeferredRegisterBody
deferredRegisterBody =
withMeta $
- withinBody $
- DeferredRegisterBody <$> many (registerBitsDecl <* tok TokSemi)
+ DeferredRegisterBody <$> many (registerBitsDecl <* tok TokSemi)
registerBitsDecl :: Pa RegisterBitsDecl
registerBitsDecl =
@@ -119,25 +167,29 @@ registerBitsDecl =
)
<|> ( DefinedBits <$> optionMaybe modifier
<*> ident
- <*> registerBitsTypeRef
+ <*> (tok TokColon >> registerBitsTypeRef)
)
registerBitsTypeRef :: Pa RegisterBitsTypeRef
-registerBitsTypeRef = recur typeRef
+registerBitsTypeRef = do
+ base <- baseTypeRef
+ recur' <- recur
+ return (recur' base)
where
- recur b = do
- recur
- ( try $
- withMeta $ do
- t <- b
- e <- tok TokLBrace *> expression <* tok TokRBrace
- return (RegisterBitsArray t e)
- )
- <|> b
+ recur :: P (RegisterBitsTypeRef Stage1 F A -> RegisterBitsTypeRef Stage1 F A)
+ recur =
+ ( do
+ withMeta $ do
+ expr <- tok TokLBracket *> expression <* tok TokRBracket
+ recur' <- recur
+ return (\met base -> recur' (RegisterBitsArray base expr met))
+ )
+ <|> return id
- typeRef =
+ baseTypeRef =
withMeta $
- (tok KWEnum >> RegisterBitsAnonymousType <$> anonymousBitsType)
+ (RegisterBitsJustBits <$> exprInParen)
+ <|> (RegisterBitsAnonymousType <$> anonymousBitsType)
<|> (RegisterBitsReference <$> ident)
anonymousBitsType :: Pa AnonymousBitsType
@@ -157,13 +209,12 @@ bitType = withMeta $ rawBits <|> enumType
enumBody :: Pa EnumBody
enumBody =
withMeta $
- withinBody $
- EnumBody <$> many (enumConstantDecl <* tok TokComma)
+ EnumBody <$> many (enumConstantDecl <* tok TokComma)
enumConstantDecl :: Pa EnumConstantDecl
enumConstantDecl =
withMeta $
- (tok KWReserved >> EnumConstantReserved <$> exprInParen)
+ (tok KWReserved >> EnumConstantReserved <$> (tok TokEq >> expression))
<|> (EnumConstantDecl <$> ident <*> (tok TokEq >> expression))
expression :: Pa Expression
@@ -176,19 +227,23 @@ expression = withMeta $
body :: P [Token SourceSpan]
body = do
- t0 <- tok TokLBrace
+ (_, b, _) <- body'
+ return b
+
+body' :: P (Token SourceSpan, [Token SourceSpan], Token SourceSpan)
+body' = do
+ l <- tokKeepComment TokLBrace
ret <-
concat
<$> manyTill
- ( body <|> fmap (: []) anyToken
+ ( ((\(b0, b1, b2) -> [b0] ++ b1 ++ [b2]) <$> body') <|> fmap (: []) anyToken
)
- (lookAhead $ tok TokRBrace)
- t1 <- tok TokRBrace
- return $ t0 : ret ++ [t1]
+ (lookAhead $ tokKeepComment TokRBrace)
+ r <- tokKeepComment TokRBrace
+
+ next <- lookAhead anyToken
--- Parses something within braces.
-withinBody :: P a -> P a
-withinBody p = tok TokLBrace *> p <* tok TokRBrace
+ return (l, stripTrailingComments ret, r)
-- A deferred parsing takes a part of a text file (such as a body) and returns a
-- deferred computation for parsing that section.
@@ -198,27 +253,32 @@ withinBody p = tok TokLBrace *> p <* tok TokRBrace
-- can parse the stuff inside the body as it's own, separate parsing.
defer :: P [Token SourceSpan] -> P b -> P (F b)
defer p0 pb = do
- contents <- p0
-
sourcePos <- getPosition
- traceM $ "Contents: " ++ show contents
-
- return $
- Text.Parsec.runParser
- ( do
- setPosition sourcePos
- pb <* eof
- )
- ()
- (sourceName sourcePos)
- contents
+ Text.Parsec.runParser
+ ( do
+ setPosition sourcePos
+ pb <* eof
+ )
+ ()
+ (sourceName sourcePos)
+ <$> p0
packageBody :: Pa PackageBody
packageBody =
withMeta $
- withinBody $
- PackageBody <$> many (fiddleDecl <* tok TokSemi)
+ PackageBody
+ <$> many
+ ( fiddleDecl
+ <* ( tok TokSemi <|> fail "Expected ';'"
+ )
+ )
+
+printNext :: P ()
+printNext = do
+ t <- lookAhead anyToken
+ traceM $ "NextToken: " ++ show t
+ return ()
ident :: Pa Identifier
ident =
@@ -230,19 +290,12 @@ ident =
-- Takes a some parsable thing p and automatically parses the comments before
-- and after and sets the positions and adds it to the annotation.
withMeta :: P (Commented SourceSpan -> b) -> P b
-withMeta p = try $ do
+withMeta p = do
comments <- many comment
start <- getPosition
fn <- p
end <- getPosition
return $ fn (Commented comments (SourceSpan start end))
- where
- comment :: P Comment
- comment =
- token $ \case
- (TokComment c) -> Just (NormalComment c)
- (TokDocComment c) -> Just (DocComment c)
- _ -> Nothing
token :: (T -> Maybe a) -> ParsecT S u Identity a
token fn =
@@ -251,14 +304,27 @@ token fn =
(\(Token _ (SourceSpan s1 _)) -> s1)
(\(Token t _) -> fn t)
+tokKeepComment :: T -> P (Token SourceSpan)
+tokKeepComment t' = do
+ Text.Parsec.token
+ (\(Token t _) -> show t)
+ (\(Token _ (SourceSpan s1 _)) -> s1)
+ (\tok@(Token t _) -> if t == t' then Just tok else Nothing)
+
tok :: T -> P (Token SourceSpan)
-tok t' =
+tok t' = do
+ many comment
Text.Parsec.token
(\(Token t _) -> show t)
(\(Token _ (SourceSpan s1 _)) -> s1)
(\tok@(Token t _) -> if t == t' then Just tok else Nothing)
parseFiddleText :: String -> Text -> F (FiddleUnit 'Stage1 F (Commented SourceSpan))
-parseFiddleText sourceName txt = runIdentity .
- Text.Parsec.runParserT
- (fiddleUnit <* eof) () sourceName =<< tokenize sourceName txt
+parseFiddleText sourceName txt =
+ runIdentity
+ . Text.Parsec.runParserT
+ (fiddleUnit <* eof)
+ ()
+ sourceName
+ . stripTrailingComments
+ =<< tokenize sourceName txt