diff options
author | Josh Rahm <joshuarahm@gmail.com> | 2023-01-08 22:44:44 -0700 |
---|---|---|
committer | Josh Rahm <joshuarahm@gmail.com> | 2023-01-08 22:47:50 -0700 |
commit | def481d234ce5e1671d9faaa539477de8cd14640 (patch) | |
tree | 76bcd95f030571c506a73ddb021eeed7a6f6aec1 /src/Language/Fiddle/Parser.hs | |
parent | 0c45ef8884ec82d26c47e952132d54d4bb8a9238 (diff) | |
download | fiddle-def481d234ce5e1671d9faaa539477de8cd14640.tar.gz fiddle-def481d234ce5e1671d9faaa539477de8cd14640.tar.bz2 fiddle-def481d234ce5e1671d9faaa539477de8cd14640.zip |
Parser is able to parse the goal file.
Diffstat (limited to 'src/Language/Fiddle/Parser.hs')
-rw-r--r-- | src/Language/Fiddle/Parser.hs | 248 |
1 files changed, 157 insertions, 91 deletions
diff --git a/src/Language/Fiddle/Parser.hs b/src/Language/Fiddle/Parser.hs index 59faeda..94fbbf9 100644 --- a/src/Language/Fiddle/Parser.hs +++ b/src/Language/Fiddle/Parser.hs @@ -7,15 +7,16 @@ module Language.Fiddle.Parser ) where -import Debug.Trace import Data.Functor.Identity import Data.Text (Text) import qualified Data.Text +import Debug.Trace import Language.Fiddle.Ast import Language.Fiddle.Tokenizer import Language.Fiddle.Types import Text.Parsec hiding (token) import qualified Text.Parsec +import Text.Printf type F = Either ParseError @@ -23,49 +24,98 @@ type S = [Token SourceSpan] type P = ParsecT S () Identity +type A = Commented SourceSpan + type Pa (a :: Stage -> (* -> *) -> * -> *) = P (a 'Stage1 F (Commented SourceSpan)) +comment :: P Comment +comment = + token $ \case + (TokComment c) -> Just (NormalComment c) + (TokDocComment c) -> Just (DocComment c) + _ -> Nothing + +isComment :: Token s -> Bool +isComment (Token t _) = + case t of + (TokComment _) -> True + (TokDocComment _) -> True + _ -> False + +-- Removes trailing comments from a list of tokens. Comments that don't preceed +-- an actual language token have minimal semantic value and are thus discarded. +stripTrailingComments :: [Token s] -> [Token s] +stripTrailingComments = reverse . dropWhile isComment . reverse + fiddleUnit :: Pa FiddleUnit fiddleUnit = do - withMeta $ - FiddleUnit <$> many1 (fiddleDecl <* tok TokSemi) + withMeta + ( FiddleUnit <$> many1 (fiddleDecl <* tok TokSemi) + ) + <* many comment fiddleDecl :: Pa FiddleDecl -fiddleDecl = - withMeta $ - choice - [ tok KWOption >> OptionDecl <$> ident <*> ident, - tok KWPackage >> PackageDecl - <$> ident - <*> defer body packageBody, - tok KWLocation >> LocationDecl <$> ident <*> (tok TokEq >> expression), - tok KWBits >> BitsDecl <$> ident <*> (tok TokColon >> bitType), - tok KWObjtype - >> ObjTypeDecl <$> ident <*> (tok TokColon >> defer body objTypeBody), - tok KWObject - >> ObjectDecl +fiddleDecl = do + withMeta $ do + t <- tokenType <$> anyToken + case t of + KWOption -> OptionDecl <$> ident <*> ident + KWPackage -> do + p <- + PackageDecl <$> ident - <*> (tok KWAt *> expression) - <*> (tok TokColon *> objType) - ] + <*> defer body packageBody + printNext + return p + KWLocation -> LocationDecl <$> ident <*> (tok TokEq >> expression) + KWBits -> BitsDecl <$> ident <*> (tok TokColon >> bitType) + KWObjtype -> + ObjTypeDecl <$> ident <*> (tok TokColon >> defer body objTypeBody) + KWObject -> + ObjectDecl + <$> ident + <*> (tok KWAt *> expression) + <*> (tok TokColon *> objType) + _ -> + fail $ + printf "Unexpected token %s. Expected top-level declaration." (show t) + +-- choice +-- [ tok KWOption >> OptionDecl <$> ident <*> ident, +-- tok KWPackage >> PackageDecl +-- <$> ident +-- <*> defer body packageBody, +-- tok KWLocation >> LocationDecl <$> ident <*> (tok TokEq >> expression), +-- tok KWBits >> BitsDecl <$> ident <*> (tok TokColon >> bitType), +-- tok KWObjtype +-- >> ObjTypeDecl <$> ident <*> (tok TokColon >> defer body objTypeBody), +-- tok KWObject +-- >> ObjectDecl +-- <$> ident +-- <*> (tok KWAt *> expression) +-- <*> (tok TokColon *> objType) +-- ] objType :: Pa ObjType -objType = recur objType' +objType = do + base <- withMeta baseObj + recur' <- recur + return $ recur' base where - recur b = do - recur - ( try $ - withMeta $ do - t <- b - e <- tok TokLBrace *> expression <* tok TokRBrace - return (ArrayObjType t e) - ) - <|> b + recur :: P (ObjType Stage1 F A -> ObjType Stage1 F A) + recur = + ( do + withMeta $ do + expr <- tok TokLBracket *> expression <* tok TokRBracket + recur' <- recur + return (\met base -> recur' (ArrayObjType base expr met)) + ) + <|> return id - objType' = - withMeta $ - (ReferencedObjType <$> ident) - <|> (AnonymousObjType <$> defer body objTypeBody) + baseObj :: P (A -> ObjType Stage1 F A) + baseObj = + (ReferencedObjType <$> ident) + <|> (AnonymousObjType <$> defer body objTypeBody) exprInParen :: Pa Expression exprInParen = tok TokLParen *> expression <* tok TokRParen @@ -73,8 +123,7 @@ exprInParen = tok TokLParen *> expression <* tok TokRParen objTypeBody :: Pa ObjTypeBody objTypeBody = withMeta $ - withinBody $ - ObjTypeBody <$> many (objTypeDecl <* tok TokSemi) + ObjTypeBody <$> many (objTypeDecl <* tok TokSemi) objTypeDecl :: Pa ObjTypeDecl objTypeDecl = @@ -84,10 +133,10 @@ objTypeDecl = AssertPosStatement <$> exprInParen ) <|> ( do + mod <- optionMaybe modifier tok KWReg - RegisterDecl - <$> optionMaybe modifier - <*> optionMaybe ident + RegisterDecl mod + <$> optionMaybe ident <*> exprInParen <*> optionMaybe (tok TokColon *> registerBody) ) @@ -108,8 +157,7 @@ registerBody = withMeta $ RegisterBody <$> defer body deferredRegisterBody deferredRegisterBody :: Pa DeferredRegisterBody deferredRegisterBody = withMeta $ - withinBody $ - DeferredRegisterBody <$> many (registerBitsDecl <* tok TokSemi) + DeferredRegisterBody <$> many (registerBitsDecl <* tok TokSemi) registerBitsDecl :: Pa RegisterBitsDecl registerBitsDecl = @@ -119,25 +167,29 @@ registerBitsDecl = ) <|> ( DefinedBits <$> optionMaybe modifier <*> ident - <*> registerBitsTypeRef + <*> (tok TokColon >> registerBitsTypeRef) ) registerBitsTypeRef :: Pa RegisterBitsTypeRef -registerBitsTypeRef = recur typeRef +registerBitsTypeRef = do + base <- baseTypeRef + recur' <- recur + return (recur' base) where - recur b = do - recur - ( try $ - withMeta $ do - t <- b - e <- tok TokLBrace *> expression <* tok TokRBrace - return (RegisterBitsArray t e) - ) - <|> b + recur :: P (RegisterBitsTypeRef Stage1 F A -> RegisterBitsTypeRef Stage1 F A) + recur = + ( do + withMeta $ do + expr <- tok TokLBracket *> expression <* tok TokRBracket + recur' <- recur + return (\met base -> recur' (RegisterBitsArray base expr met)) + ) + <|> return id - typeRef = + baseTypeRef = withMeta $ - (tok KWEnum >> RegisterBitsAnonymousType <$> anonymousBitsType) + (RegisterBitsJustBits <$> exprInParen) + <|> (RegisterBitsAnonymousType <$> anonymousBitsType) <|> (RegisterBitsReference <$> ident) anonymousBitsType :: Pa AnonymousBitsType @@ -157,13 +209,12 @@ bitType = withMeta $ rawBits <|> enumType enumBody :: Pa EnumBody enumBody = withMeta $ - withinBody $ - EnumBody <$> many (enumConstantDecl <* tok TokComma) + EnumBody <$> many (enumConstantDecl <* tok TokComma) enumConstantDecl :: Pa EnumConstantDecl enumConstantDecl = withMeta $ - (tok KWReserved >> EnumConstantReserved <$> exprInParen) + (tok KWReserved >> EnumConstantReserved <$> (tok TokEq >> expression)) <|> (EnumConstantDecl <$> ident <*> (tok TokEq >> expression)) expression :: Pa Expression @@ -176,19 +227,23 @@ expression = withMeta $ body :: P [Token SourceSpan] body = do - t0 <- tok TokLBrace + (_, b, _) <- body' + return b + +body' :: P (Token SourceSpan, [Token SourceSpan], Token SourceSpan) +body' = do + l <- tokKeepComment TokLBrace ret <- concat <$> manyTill - ( body <|> fmap (: []) anyToken + ( ((\(b0, b1, b2) -> [b0] ++ b1 ++ [b2]) <$> body') <|> fmap (: []) anyToken ) - (lookAhead $ tok TokRBrace) - t1 <- tok TokRBrace - return $ t0 : ret ++ [t1] + (lookAhead $ tokKeepComment TokRBrace) + r <- tokKeepComment TokRBrace + + next <- lookAhead anyToken --- Parses something within braces. -withinBody :: P a -> P a -withinBody p = tok TokLBrace *> p <* tok TokRBrace + return (l, stripTrailingComments ret, r) -- A deferred parsing takes a part of a text file (such as a body) and returns a -- deferred computation for parsing that section. @@ -198,27 +253,32 @@ withinBody p = tok TokLBrace *> p <* tok TokRBrace -- can parse the stuff inside the body as it's own, separate parsing. defer :: P [Token SourceSpan] -> P b -> P (F b) defer p0 pb = do - contents <- p0 - sourcePos <- getPosition - traceM $ "Contents: " ++ show contents - - return $ - Text.Parsec.runParser - ( do - setPosition sourcePos - pb <* eof - ) - () - (sourceName sourcePos) - contents + Text.Parsec.runParser + ( do + setPosition sourcePos + pb <* eof + ) + () + (sourceName sourcePos) + <$> p0 packageBody :: Pa PackageBody packageBody = withMeta $ - withinBody $ - PackageBody <$> many (fiddleDecl <* tok TokSemi) + PackageBody + <$> many + ( fiddleDecl + <* ( tok TokSemi <|> fail "Expected ';'" + ) + ) + +printNext :: P () +printNext = do + t <- lookAhead anyToken + traceM $ "NextToken: " ++ show t + return () ident :: Pa Identifier ident = @@ -230,19 +290,12 @@ ident = -- Takes a some parsable thing p and automatically parses the comments before -- and after and sets the positions and adds it to the annotation. withMeta :: P (Commented SourceSpan -> b) -> P b -withMeta p = try $ do +withMeta p = do comments <- many comment start <- getPosition fn <- p end <- getPosition return $ fn (Commented comments (SourceSpan start end)) - where - comment :: P Comment - comment = - token $ \case - (TokComment c) -> Just (NormalComment c) - (TokDocComment c) -> Just (DocComment c) - _ -> Nothing token :: (T -> Maybe a) -> ParsecT S u Identity a token fn = @@ -251,14 +304,27 @@ token fn = (\(Token _ (SourceSpan s1 _)) -> s1) (\(Token t _) -> fn t) +tokKeepComment :: T -> P (Token SourceSpan) +tokKeepComment t' = do + Text.Parsec.token + (\(Token t _) -> show t) + (\(Token _ (SourceSpan s1 _)) -> s1) + (\tok@(Token t _) -> if t == t' then Just tok else Nothing) + tok :: T -> P (Token SourceSpan) -tok t' = +tok t' = do + many comment Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\tok@(Token t _) -> if t == t' then Just tok else Nothing) parseFiddleText :: String -> Text -> F (FiddleUnit 'Stage1 F (Commented SourceSpan)) -parseFiddleText sourceName txt = runIdentity . - Text.Parsec.runParserT - (fiddleUnit <* eof) () sourceName =<< tokenize sourceName txt +parseFiddleText sourceName txt = + runIdentity + . Text.Parsec.runParserT + (fiddleUnit <* eof) + () + sourceName + . stripTrailingComments + =<< tokenize sourceName txt |