{-# LANGUAGE LambdaCase #-} {-# LANGUAGE ScopedTypeVariables #-} module Language.Fiddle.Parser ( fiddleUnit, parseFiddleText, ) where import Data.Functor.Identity import Data.Text (Text) import qualified Data.Text import Language.Fiddle.Ast import Language.Fiddle.Tokenizer import Language.Fiddle.Types import Text.Parsec hiding (token) import qualified Text.Parsec type F = Either ParseError type S = [Token SourceSpan] type P = ParsecT S () Identity type Pa (a :: Stage -> (* -> *) -> * -> *) = P (a 'Stage1 F (Commented SourceSpan)) fiddleUnit :: Pa FiddleUnit fiddleUnit = withMeta $ FiddleUnit <$> many (fiddleDecl <* tok TokSemi) fiddleDecl :: Pa FiddleDecl fiddleDecl = withMeta $ choice [ tok KWOption >> OptionDecl <$> ident <*> ident, tok KWPackage >> PackageDecl <$> ident <*> defer body packageBody, tok KWLocation >> LocationDecl <$> ident <*> (tok TokEq >> expression), tok KWBits >> BitsDecl <$> ident <*> (tok TokColon >> bitType), tok KWObjtype >> ObjTypeDecl <$> ident <*> (tok TokColon >> defer body objTypeBody), tok KWObject >> ObjectDecl <$> ident <*> (tok KWAt *> expression) <*> (tok TokColon *> objType) ] objType :: Pa ObjType objType = recur objType' where recur b = do recur ( try $ withMeta $ do t <- b e <- tok TokLBrace *> expression <* tok TokRBrace return (ArrayObjType t e) ) <|> b objType' = withMeta $ (ReferencedObjType <$> ident) <|> (AnonymousObjType <$> defer body objTypeBody) exprInParen :: P (Expression Stage1 (Commented SourceSpan)) exprInParen = tok TokLParen *> expression <* tok TokRParen objTypeBody :: Pa ObjTypeBody objTypeBody = withMeta $ withinBody $ ObjTypeBody <$> many (objTypeDecl <* tok TokSemi) objTypeDecl :: Pa ObjTypeDecl objTypeDecl = withMeta $ ( do tok KWAssertPos AssertPosStatement <$> exprInParen ) <|> ( do tok KWReg RegisterDecl <$> optionMaybe modifier <*> optionMaybe ident <*> exprInParen <*> optionMaybe (tok TokColon *> registerBody) ) modifier :: P (Modifier (Commented SourceSpan)) modifier = withMeta $ ModifierKeyword <$> choice [ tok KWRo >> return Ro, tok KWRw >> return Rw, tok KWWo >> return Wo ] registerBody :: Pa RegisterBody registerBody = withMeta $ RegisterBody <$> defer body deferredRegisterBody deferredRegisterBody :: Pa DeferredRegisterBody deferredRegisterBody = withMeta $ withinBody $ DeferredRegisterBody <$> many (registerBitsDecl <* tok TokSemi) registerBitsDecl :: Pa RegisterBitsDecl registerBitsDecl = withMeta $ ( do tok KWReserved >> ReservedBits <$> exprInParen ) <|> ( DefinedBits <$> optionMaybe modifier <*> ident <*> registerBitsTypeRef ) registerBitsTypeRef :: Pa RegisterBitsTypeRef registerBitsTypeRef = recur typeRef where recur b = do recur ( try $ withMeta $ do t <- b e <- tok TokLBrace *> expression <* tok TokRBrace return (RegisterBitsArray t e) ) <|> b typeRef = withMeta $ (tok KWEnum >> RegisterBitsAnonymousType <$> anonymousBitsType) <|> (RegisterBitsReference <$> ident) anonymousBitsType :: P (AnonymousBitsType F (Commented SourceSpan)) anonymousBitsType = withMeta $ do tok KWEnum AnonymousEnumBody <$> exprInParen <*> defer body enumBody bitType :: Pa BitType bitType = withMeta $ rawBits <|> enumType where rawBits = RawBits <$> (tok TokLParen *> expression <* tok TokRParen) enumType = do tok KWEnum expr <- exprInParen EnumBitType expr <$> defer body enumBody enumBody :: Pa EnumBody enumBody = withMeta $ withinBody $ EnumBody <$> many enumConstantDecl enumConstantDecl :: P (EnumConstantDecl Stage1 (Commented SourceSpan)) enumConstantDecl = withMeta $ (tok KWReserved >> EnumConstantReserved <$> exprInParen) <|> (EnumConstantDecl <$> ident <*> (tok TokEq >> expression)) expression :: P (Expression 'Stage1 (Commented SourceSpan)) expression = withMeta $ token $ \case (TokLitNum num) -> Just (LitNum num) (TokIdent i) -> Just $ \(Commented cs s) -> Var (Identifier i (Commented [] s)) (Commented cs s) _ -> Nothing body :: P [Token SourceSpan] body = do t0 <- tok TokLBrace ret <- concat <$> manyTill ( body <|> fmap (: []) anyToken ) (lookAhead $ tok TokRBrace) return $ t0 : ret -- Parses something within braces. withinBody :: P a -> P a withinBody p = tok TokLBrace *> p <* tok TokRBrace -- A deferred parsing takes a part of a text file (such as a body) and returns a -- deferred computation for parsing that section. -- -- This is useful because it allows for parse errors to be detected in multiple -- locations. This is because for things like bodies (stuff inside { ... }), we -- can parse the stuff inside the body as it's own, separate parsing. defer :: P [Token SourceSpan] -> P b -> P (F b) defer p0 pb = do contents <- p0 sourcePos <- getPosition return $ Text.Parsec.runParser ( do setPosition sourcePos pb <* eof ) () (sourceName sourcePos) contents packageBody :: Pa PackageBody packageBody = withMeta $ withinBody $ PackageBody <$> many (fiddleDecl <* tok TokSemi) ident :: P (Identifier (Commented SourceSpan)) ident = withMeta $ token $ \case (TokIdent id) -> Just (Identifier id) _ -> Nothing -- Takes a some parsable thing p and automatically parses the comments before -- and after and sets the positions and adds it to the annotation. withMeta :: P (Commented SourceSpan -> b) -> P b withMeta p = try $ do comments <- many comment (Token _ (SourceSpan start _)) <- lookAhead anyToken fn <- p (Token _ (SourceSpan end _)) <- lookAhead anyToken return $ fn (Commented comments (SourceSpan start end)) where comment :: P Comment comment = token $ \case (TokComment c) -> Just (NormalComment c) (TokDocComment c) -> Just (DocComment c) _ -> Nothing token :: (T -> Maybe a) -> ParsecT S u Identity a token fn = Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\(Token t _) -> fn t) tok :: T -> P (Token SourceSpan) tok t' = Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\tok@(Token t _) -> if t == t' then Just tok else Nothing) parseFiddleText :: String -> Text -> F (FiddleUnit 'Stage1 F (Commented SourceSpan)) parseFiddleText sourceName txt = runIdentity $ do Text.Parsec.runParserT fiddleUnit () sourceName []