{-# LANGUAGE LambdaCase #-} {-# LANGUAGE ScopedTypeVariables #-} module Language.Fiddle.Parser ( fiddleUnit, parseFiddleText, ) where import Control.Monad (void) import Data.Functor.Identity import Data.Kind (Type) import Data.Text (Text) import Language.Fiddle.Ast import Language.Fiddle.Tokenizer import Language.Fiddle.Types import Text.Parsec hiding (token) import qualified Text.Parsec import Text.Printf type F = Either ParseError type S = [Token SourceSpan] type P = ParsecT S () Identity type A = Commented SourceSpan type Pa (a :: Stage -> (Type -> Type) -> Type -> Type) = P (a 'Parsed F (Commented SourceSpan)) type PaS (a :: (Type -> Type) -> Type -> Type) = P (a F (Commented SourceSpan)) noQMd :: F (QMd Parsed t) noQMd = pure Vacant commentP :: P Comment commentP = token $ \case (TokComment c) -> Just (NormalComment c) (TokDocComment c) -> Just (DocComment c) _ -> Nothing isComment :: Token s -> Bool isComment (Token t _) = case t of (TokComment _) -> True (TokDocComment _) -> True _ -> False -- Removes trailing comments from a list of tokens. Comments that don't preceed -- an actual language token have minimal semantic value and are thus discarded. stripTrailingComments :: [Token s] -> [Token s] stripTrailingComments = reverse . dropWhile isComment . reverse directedP :: (Annotated (t Parsed)) => Pa t -> PaS (Directed t 'Parsed) directedP subparser = withMetaLeaveComments $ do coms <- many commentP Directed <$> many directiveP <*> pushComments coms subparser pushComments :: (Annotated t) => [Comment] -> PaS t -> PaS t pushComments coms subparse = do setAnnot (\(Commented coms' a) -> Commented (coms ++ coms') a) <$> subparse directiveP :: PaS Directive directiveP = withMeta $ Directive <$> defer directiveBodyTokens directiveBodyP directiveBodyP :: PaS DirectiveBody directiveBodyP = withMeta $ do DirectiveBody <$> many (directiveElementP <* (void (tok TokComma) <|> eof)) directiveElementP :: PaS DirectiveElement directiveElementP = withMeta $ do identifier1 <- nextTextP choice [ do tok_ TokColon let backend = identifier1 key <- nextTextP choice [ do tok_ TokEq DirectiveElementKeyValue (Just backend) key <$> directiveExpressionP, do return (DirectiveElementKey (Just backend) key) ], do tok_ TokEq let key = identifier1 DirectiveElementKeyValue Nothing key <$> directiveExpressionP, return $ DirectiveElementKey Nothing identifier1 ] nextTextP :: PaS Identifier nextTextP = withMeta $ Identifier <$> token textOf directiveExpressionP :: PaS DirectiveExpression directiveExpressionP = withMeta $ do choice [ do token $ \case (TokString str) -> Just $ DirectiveString str (TokLitNum num) -> Just $ DirectiveNumber num _ -> Nothing ] fiddleUnit :: Pa FiddleUnit fiddleUnit = do withMeta ( FiddleUnit Vacant <$> many1 (directedP fiddleDeclP <* tok TokSemi) ) <* many commentP stringTokenP :: P Text stringTokenP = token ( \case (TokString str) -> Just str _ -> Nothing ) importListP :: PaS ImportList importListP = withMeta $ do tok_ TokLParen ImportList <$> many (ident <* (tok TokComma <|> lookAhead (tok TokRParen))) <* tok TokRParen importStatementP :: Pa ImportStatement importStatementP = withMeta $ ImportStatement <$> stringTokenP <*> optionMaybe importListP <*> pure Vacant nameToIdent :: PaS Identifier -> PaS Name nameToIdent ifn = withMeta $ Name . pure <$> ident fiddleDeclP :: Pa FiddleDecl fiddleDeclP = do withMeta $ do t <- tokenType <$> anyToken case t of KWOption -> OptionDecl <$> nextTextP <*> nextTextP KWPackage -> PackageDecl noQMd <$> name <*> defer body packageBodyP KWUsing -> UsingDecl Witness <$> name KWLocation -> LocationDecl noQMd <$> ident <*> (tok TokEq >> constExpressionP) KWBits -> BitsDecl noQMd <$> nameToIdent ident <*> (tok TokColon >> bitTypeP) KWImport -> ImportDecl <$> importStatementP KWType -> ObjTypeDecl noQMd <$> nameToIdent ident <*> ( do tok_ TokColon bt <- bodyTypeP defer body (objTypeBodyP bt) ) KWInstance -> ObjectDecl noQMd <$> ident <*> (tok KWAt *> expressionP) <*> (tok TokColon *> objTypeP) _ -> fail $ printf "Unexpected token %s. Expected top-level declaration." (show t) objTypeP :: Pa ObjType objTypeP = do base <- withMeta baseObjP recur' <- recur return $ recur' base where recur :: P (ObjType Parsed F A -> ObjType Parsed F A) recur = ( do withMeta $ do expr <- tok TokLBracket *> constExpressionP <* tok TokRBracket recur' <- recur return (\met base -> recur' (ArrayObjType base expr met)) ) <|> return id baseObjP :: P (A -> ObjType Parsed F A) baseObjP = (ReferencedObjType noQMd <$> name) <|> ( do t <- bodyTypeP AnonymousObjType Witness <$> defer body (objTypeBodyP t) ) asConstP :: Pa (Expression u) -> Pa (ConstExpression u) asConstP fn = withMeta $ ConstExpression . RightV <$> fn exprInParenP :: Pa (Expression u) exprInParenP = tok TokLParen *> expressionP <* tok TokRParen inParenP :: Pa a -> Pa a inParenP p = tok TokLParen *> p <* tok TokRParen objTypeBodyP :: BodyType F (Commented SourceSpan) -> Pa ObjTypeBody objTypeBodyP bt = withMeta $ ObjTypeBody bt <$> many (directedP objTypeDeclP <* tok TokSemi) objTypeDeclP :: Pa ObjTypeDecl objTypeDeclP = withMeta $ ( do tok_ KWAssertPos AssertPosStatement Witness <$> exprInParenP ) <|> ( do tok_ KWSkipTo SkipToStatement Witness Vacant <$> inParenP constExpressionP ) <|> ( do tok_ KWBuffer BufferDecl Vacant . Perhaps <$> optionMaybe ident <*> inParenP constExpressionP ) <|> ( do tok_ KWReserved ReservedDecl Witness <$> exprInParenP ) <|> ( do bt <- bodyTypeP TypeSubStructure <$> defer body (objTypeBodyP bt) <*> optionMaybe ident ) <|> ( do modifier <- Perhaps <$> optionMaybe modifierP tok_ KWReg RegisterDecl Vacant modifier . Perhaps <$> optionMaybe ident <*> fmap RightV exprInParenP <*> (Present . RegisterPosition <$> optionMaybe (tok TokAtSign >> litNumP)) <*> optionMaybe (tok TokColon *> registerBodyP) ) modifierP :: PaS Modifier modifierP = withMeta $ ModifierKeyword <$> choice [ tok KWRo >> return Ro, tok KWRw >> return Rw, tok KWWo >> return Wo ] bitBodyTypeP :: PaS BodyType bitBodyTypeP = withMeta $ (tok KWStruct >> return Struct) <|> (tok KWUnion >> return Union) bodyTypeP :: PaS BodyType bodyTypeP = withMeta $ (tok KWStruct >> return Struct) <|> (tok KWUnion >> return Union) registerBodyP :: Pa RegisterBody registerBodyP = withMeta $ RegisterBody <$> bitBodyTypeP <*> defer body deferredRegisterBodyP deferredRegisterBodyP :: Pa DeferredRegisterBody deferredRegisterBodyP = withMetaLeaveComments $ DeferredRegisterBody <$> many (directedP registerBitsDeclP <* tok TokSemi) registerBitsDeclP :: Pa RegisterBitsDecl registerBitsDeclP = withMeta $ ( do tok KWReserved >> ReservedBits <$> exprInParenP ) <|> ( do mod <- optionMaybe modifierP ( BitsSubStructure Vacant (Perhaps mod) <$> registerBodyP <*> optionMaybe ident ) <|> ( DefinedBits Vacant (Perhaps mod) <$> ident <*> (tok TokColon >> registerBitsTypeRefP) ) ) registerBitsTypeRefP :: Pa RegisterBitsTypeRef registerBitsTypeRefP = do base <- baseTypeRef recur' <- recurP return (recur' base) where recurP :: P (RegisterBitsTypeRef Parsed F A -> RegisterBitsTypeRef Parsed F A) recurP = ( do withMeta $ do expr <- tok TokLBracket *> constExpressionP <* tok TokRBracket recur' <- recurP return (\met base -> recur' (RegisterBitsArray base expr met)) ) <|> return id baseTypeRef = withMeta $ (RegisterBitsJustBits <$> asConstP exprInParenP) <|> (RegisterBitsAnonymousType Witness <$> anonymousBitsTypeP) <|> (RegisterBitsReference noQMd <$> name) anonymousBitsTypeP :: Pa AnonymousBitsType anonymousBitsTypeP = withMeta $ do tok_ KWEnum AnonymousEnumBody <$> exprInParenP <*> defer body enumBodyP bitTypeP :: Pa BitType bitTypeP = withMeta $ rawBits <|> enumType where rawBits = RawBits <$> (tok TokLParen *> expressionP <* tok TokRParen) enumType = do tok_ KWEnum expr <- exprInParenP EnumBitType expr <$> defer body enumBodyP enumBodyP :: Pa EnumBody enumBodyP = withMeta $ EnumBody <$> many (directedP enumConstantDeclP <* tok TokComma) enumConstantDeclP :: Pa EnumConstantDecl enumConstantDeclP = withMeta $ (tok KWReserved >> EnumConstantReserved <$> (tok TokEq >> expressionP)) <|> (EnumConstantDecl <$> ident <*> (tok TokEq >> constExpressionP)) constExpressionP :: Pa (ConstExpression u) constExpressionP = withMeta $ ConstExpression . RightV <$> expressionP litNumP :: P Text litNumP = token ( \case (TokLitNum num) -> Just num _ -> Nothing ) expressionP :: Pa (Expression u) expressionP = withMeta $ (LitNum . LeftV <$> litNumP) <|> (Var <$> name) body :: P [Token SourceSpan] body = do (_, b, _) <- body' return b directiveBodyTokens :: P [Token SourceSpan] directiveBodyTokens = do _ <- tokKeepComment TokDirectiveStart ret <- concat <$> manyTill ((: []) <$> anyToken) (lookAhead $ tokKeepComment TokDirectiveEnd) _ <- tokKeepComment TokDirectiveEnd return ret body' :: P (Token SourceSpan, [Token SourceSpan], Token SourceSpan) body' = do l <- tokKeepComment TokLBrace ret <- concat <$> manyTill ( ((\(b0, b1, b2) -> [b0] ++ b1 ++ [b2]) <$> body') <|> fmap (: []) anyToken ) (lookAhead $ tokKeepComment TokRBrace) r <- tokKeepComment TokRBrace _ <- lookAhead anyToken return (l, stripTrailingComments ret, r) -- A deferred parsing takes a part of a text file (such as a body) and returns a -- deferred computation for parsing that section. -- -- This is useful because it allows for parse errors to be detected in multiple -- locations. This is because for things like bodies (stuff inside { ... }), we -- can parse the stuff inside the body as it's own, separate parsing. defer :: P [Token SourceSpan] -> P b -> P (F b) defer p0 pb = do sourcePos <- getPosition Text.Parsec.runParser ( do setPosition sourcePos pb <* eof ) () (sourceName sourcePos) <$> p0 packageBodyP :: Pa PackageBody packageBodyP = withMetaLeaveComments $ PackageBody <$> many ( directedP $ fiddleDeclP <* ( tok TokSemi <|> fail "Expected ';'" ) ) ident :: PaS Identifier ident = withMeta $ token $ \case (TokIdent identTok) -> Just (Identifier identTok) _ -> Nothing name :: PaS Name name = withMeta $ do i <- ident is <- many $ do tok_ TokDot ident return $ Name (i :| is) -- Takes a some parsable thing p and automatically parses the comments before -- and after and sets the positions and adds it to the annotation. withMeta :: P (Commented SourceSpan -> b) -> P b withMeta p = do comments' <- many commentP start <- getPosition fn <- p end <- getPosition return $ fn (Commented comments' (SourceSpan start end)) -- Takes a some parsable thing p and automatically parses the comments before -- and after and sets the positions and adds it to the annotation. withMetaLeaveComments :: P (Commented SourceSpan -> b) -> P b withMetaLeaveComments p = do start <- getPosition fn <- p end <- getPosition return $ fn (Commented [] (SourceSpan start end)) token :: (T -> Maybe a) -> ParsecT S u Identity a token fn = Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\(Token t _) -> fn t) tokKeepComment :: T -> P (Token SourceSpan) tokKeepComment t' = do Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\aToken@(Token t _) -> if t == t' then Just aToken else Nothing) tok_ :: T -> P () tok_ = void . tok tok :: T -> P (Token SourceSpan) tok t' = do _ <- many commentP Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\tt@(Token t _) -> if t == t' then Just tt else Nothing) parseFiddleText :: String -> Text -> F (FiddleUnit 'Parsed F (Commented SourceSpan)) parseFiddleText srcName txt = runIdentity . Text.Parsec.runParserT (fiddleUnit <* eof) () srcName . stripTrailingComments =<< tokenize srcName txt