{-# LANGUAGE LambdaCase #-} {-# LANGUAGE ScopedTypeVariables #-} module Language.Fiddle.Parser ( fiddleUnit, parseFiddleText, ) where import Control.Monad (void) import Data.Functor.Identity import Data.Kind (Type) import Data.List.NonEmpty (NonEmpty (..)) import Data.Text (Text) import Language.Fiddle.Ast import Language.Fiddle.Tokenizer import Language.Fiddle.Types import Text.Parsec hiding (token) import qualified Text.Parsec import Text.Printf type F = Either ParseError type S = [Token SourceSpan] type P = ParsecT S () Identity type A = Commented SourceSpan type Pa (a :: Stage -> (Type -> Type) -> Type -> Type) = P (a 'Parsed F (Commented SourceSpan)) type PaS (a :: (Type -> Type) -> Type -> Type) = P (a F (Commented SourceSpan)) commentP :: P Comment commentP = token $ \case (TokComment c) -> Just (NormalComment c) (TokDocComment c) -> Just (DocComment c) _ -> Nothing isComment :: Token s -> Bool isComment (Token t _) = case t of (TokComment _) -> True (TokDocComment _) -> True _ -> False -- Removes trailing comments from a list of tokens. Comments that don't preceed -- an actual language token have minimal semantic value and are thus discarded. stripTrailingComments :: [Token s] -> [Token s] stripTrailingComments = reverse . dropWhile isComment . reverse directedP :: (Annotated (t Parsed)) => Pa t -> PaS (Directed t 'Parsed) directedP subparser = withMetaLeaveComments $ do coms <- many commentP Directed <$> many directiveP <*> pushComments coms subparser pushComments :: (Annotated t) => [Comment] -> PaS t -> PaS t pushComments coms subparse = do setAnnot (\(Commented coms' a) -> Commented (coms ++ coms') a) <$> subparse directiveP :: PaS Directive directiveP = withMeta $ Directive <$> defer directiveBodyTokens directiveBodyP directiveBodyP :: PaS DirectiveBody directiveBodyP = withMeta $ do DirectiveBody <$> many (directiveElementP <* (void (tok TokComma) <|> eof)) directiveElementP :: PaS DirectiveElement directiveElementP = withMeta $ do identifier1 <- nextTextP choice [ do tok_ TokColon let backend = identifier1 key <- nextTextP choice [ do tok_ TokEq DirectiveElementKeyValue (Just backend) key <$> directiveExpressionP, do return (DirectiveElementKey (Just backend) key) ], do tok_ TokEq let key = identifier1 DirectiveElementKeyValue Nothing key <$> directiveExpressionP, return $ DirectiveElementKey Nothing identifier1 ] nextTextP :: PaS Identifier nextTextP = withMeta $ Identifier <$> token textOf directiveExpressionP :: PaS DirectiveExpression directiveExpressionP = withMeta $ do choice [ do token $ \case (TokString str) -> Just $ DirectiveString str (TokLitNum num) -> Just $ DirectiveNumber num _ -> Nothing ] fiddleUnit :: Pa FiddleUnit fiddleUnit = do withMeta ( FiddleUnit () <$> many1 (directedP fiddleDeclP <* tok TokSemi) ) <* many commentP stringTokenP :: P Text stringTokenP = token ( \case (TokString str) -> Just str _ -> Nothing ) importListP :: PaS ImportList importListP = withMeta $ do tok_ TokLParen ImportList <$> many (ident <* (tok TokComma <|> lookAhead (tok TokRParen))) <* tok TokRParen importStatementP :: Pa ImportStatement importStatementP = withMeta $ ImportStatement <$> stringTokenP <*> optionMaybe importListP <*> pure () fiddleDeclP :: Pa FiddleDecl fiddleDeclP = do withMeta $ do t <- tokenType <$> anyToken case t of KWOption -> OptionDecl <$> nextTextP <*> nextTextP KWPackage -> PackageDecl (pure ()) <$> name <*> defer body packageBodyP KWUsing -> UsingDecl (Witness ()) <$> name KWLocation -> LocationDecl (pure ()) <$> ident <*> (tok TokEq >> expressionP) KWBits -> BitsDecl (pure ()) <$> ident <*> (tok TokColon >> bitTypeP) KWImport -> ImportDecl <$> importStatementP KWType -> ObjTypeDecl (pure ()) <$> ident <*> ( do tok_ TokColon bt <- bodyTypeP defer body (objTypeBodyP bt) ) KWInstance -> ObjectDecl (pure ()) <$> ident <*> (tok KWAt *> expressionP) <*> (tok TokColon *> objTypeP) _ -> fail $ printf "Unexpected token %s. Expected top-level declaration." (show t) objTypeP :: Pa ObjType objTypeP = do base <- withMeta baseObjP recur' <- recur return $ recur' base where recur :: P (ObjType Parsed F A -> ObjType Parsed F A) recur = ( do withMeta $ do expr <- tok TokLBracket *> expressionP <* tok TokRBracket recur' <- recur return (\met base -> recur' (ArrayObjType base expr met)) ) <|> return id baseObjP :: P (A -> ObjType Parsed F A) baseObjP = (ReferencedObjType (pure ()) <$> name) <|> ( do t <- bodyTypeP AnonymousObjType (Witness ()) <$> defer body (objTypeBodyP t) ) exprInParenP :: Pa Expression exprInParenP = tok TokLParen *> expressionP <* tok TokRParen objTypeBodyP :: BodyType F (Commented SourceSpan) -> Pa ObjTypeBody objTypeBodyP bt = withMeta $ ObjTypeBody bt <$> many (directedP objTypeDeclP <* tok TokSemi) objTypeDeclP :: Pa ObjTypeDecl objTypeDeclP = withMeta $ ( do tok_ KWAssertPos AssertPosStatement (Witness ()) <$> exprInParenP ) <|> ( do tok_ KWReserved ReservedDecl <$> exprInParenP ) <|> ( do bt <- bodyTypeP TypeSubStructure <$> defer body (objTypeBodyP bt) <*> optionMaybe ident ) <|> ( do modifier <- optionMaybe modifierP tok_ KWReg RegisterDecl () modifier <$> optionMaybe ident <*> exprInParenP <*> optionMaybe (tok TokColon *> registerBodyP) ) modifierP :: PaS Modifier modifierP = withMeta $ ModifierKeyword <$> choice [ tok KWRo >> return Ro, tok KWRw >> return Rw, tok KWWo >> return Wo ] bitBodyTypeP :: PaS BodyType bitBodyTypeP = withMeta $ (tok KWStruct >> return Struct) <|> (tok KWUnion >> return Union) bodyTypeP :: PaS BodyType bodyTypeP = withMeta $ (tok KWStruct >> return Struct) <|> (tok KWUnion >> return Union) registerBodyP :: Pa RegisterBody registerBodyP = withMeta $ RegisterBody <$> bitBodyTypeP <*> defer body deferredRegisterBodyP deferredRegisterBodyP :: Pa DeferredRegisterBody deferredRegisterBodyP = withMeta $ DeferredRegisterBody <$> many (directedP registerBitsDeclP <* tok TokSemi) registerBitsDeclP :: Pa RegisterBitsDecl registerBitsDeclP = withMeta $ ( do tok KWReserved >> ReservedBits <$> exprInParenP ) <|> (BitsSubStructure <$> registerBodyP <*> optionMaybe ident) <|> ( DefinedBits () <$> optionMaybe modifierP <*> ident <*> (tok TokColon >> registerBitsTypeRefP) ) registerBitsTypeRefP :: Pa RegisterBitsTypeRef registerBitsTypeRefP = do base <- baseTypeRef recur' <- recurP return (recur' base) where recurP :: P (RegisterBitsTypeRef Parsed F A -> RegisterBitsTypeRef Parsed F A) recurP = ( do withMeta $ do expr <- tok TokLBracket *> expressionP <* tok TokRBracket recur' <- recurP return (\met base -> recur' (RegisterBitsArray base expr met)) ) <|> return id baseTypeRef = withMeta $ (RegisterBitsJustBits <$> exprInParenP) <|> (RegisterBitsAnonymousType (Witness ()) <$> anonymousBitsTypeP) <|> (RegisterBitsReference (pure ()) <$> name) anonymousBitsTypeP :: Pa AnonymousBitsType anonymousBitsTypeP = withMeta $ do tok_ KWEnum AnonymousEnumBody <$> exprInParenP <*> defer body enumBodyP bitTypeP :: Pa BitType bitTypeP = withMeta $ rawBits <|> enumType where rawBits = RawBits <$> (tok TokLParen *> expressionP <* tok TokRParen) enumType = do tok_ KWEnum expr <- exprInParenP EnumBitType expr <$> defer body enumBodyP enumBodyP :: Pa EnumBody enumBodyP = withMeta $ EnumBody <$> many (directedP enumConstantDeclP <* tok TokComma) enumConstantDeclP :: Pa EnumConstantDecl enumConstantDeclP = withMeta $ (tok KWReserved >> EnumConstantReserved <$> (tok TokEq >> expressionP)) <|> (EnumConstantDecl <$> ident <*> (tok TokEq >> expressionP)) expressionP :: Pa Expression expressionP = withMeta $ token ( \case (TokLitNum num) -> Just (LitNum num) _ -> Nothing ) <|> (Var <$> name) body :: P [Token SourceSpan] body = do (_, b, _) <- body' return b directiveBodyTokens :: P [Token SourceSpan] directiveBodyTokens = do _ <- tokKeepComment TokDirectiveStart ret <- concat <$> manyTill ((: []) <$> anyToken) (lookAhead $ tokKeepComment TokDirectiveEnd) _ <- tokKeepComment TokDirectiveEnd return ret body' :: P (Token SourceSpan, [Token SourceSpan], Token SourceSpan) body' = do l <- tokKeepComment TokLBrace ret <- concat <$> manyTill ( ((\(b0, b1, b2) -> [b0] ++ b1 ++ [b2]) <$> body') <|> fmap (: []) anyToken ) (lookAhead $ tokKeepComment TokRBrace) r <- tokKeepComment TokRBrace _ <- lookAhead anyToken return (l, stripTrailingComments ret, r) -- A deferred parsing takes a part of a text file (such as a body) and returns a -- deferred computation for parsing that section. -- -- This is useful because it allows for parse errors to be detected in multiple -- locations. This is because for things like bodies (stuff inside { ... }), we -- can parse the stuff inside the body as it's own, separate parsing. defer :: P [Token SourceSpan] -> P b -> P (F b) defer p0 pb = do sourcePos <- getPosition Text.Parsec.runParser ( do setPosition sourcePos pb <* eof ) () (sourceName sourcePos) <$> p0 packageBodyP :: Pa PackageBody packageBodyP = withMetaLeaveComments $ PackageBody <$> many ( directedP $ fiddleDeclP <* ( tok TokSemi <|> fail "Expected ';'" ) ) ident :: PaS Identifier ident = withMeta $ token $ \case (TokIdent identTok) -> Just (Identifier identTok) _ -> Nothing name :: PaS Name name = withMeta $ do i <- ident is <- many $ do tok_ TokDot ident return $ Name (i :| is) -- Takes a some parsable thing p and automatically parses the comments before -- and after and sets the positions and adds it to the annotation. withMeta :: P (Commented SourceSpan -> b) -> P b withMeta p = do comments' <- many commentP start <- getPosition fn <- p end <- getPosition return $ fn (Commented comments' (SourceSpan start end)) -- Takes a some parsable thing p and automatically parses the comments before -- and after and sets the positions and adds it to the annotation. withMetaLeaveComments :: P (Commented SourceSpan -> b) -> P b withMetaLeaveComments p = do start <- getPosition fn <- p end <- getPosition return $ fn (Commented [] (SourceSpan start end)) token :: (T -> Maybe a) -> ParsecT S u Identity a token fn = Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\(Token t _) -> fn t) tokKeepComment :: T -> P (Token SourceSpan) tokKeepComment t' = do Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\aToken@(Token t _) -> if t == t' then Just aToken else Nothing) tok_ :: T -> P () tok_ = void . tok tok :: T -> P (Token SourceSpan) tok t' = do _ <- many commentP Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\tt@(Token t _) -> if t == t' then Just tt else Nothing) parseFiddleText :: String -> Text -> F (FiddleUnit 'Parsed F (Commented SourceSpan)) parseFiddleText srcName txt = runIdentity . Text.Parsec.runParserT (fiddleUnit <* eof) () srcName . stripTrailingComments =<< tokenize srcName txt