{-# LANGUAGE LambdaCase #-} {-# LANGUAGE ScopedTypeVariables #-} module Language.Fiddle.Parser ( fiddleUnit, parseFiddleText, ) where import Control.Monad (void) import Data.Functor.Identity import Data.Kind (Type) import Data.List.NonEmpty (NonEmpty (..)) import Data.Text (Text) import qualified Data.Text import Debug.Trace import Language.Fiddle.Ast import Language.Fiddle.Tokenizer import Language.Fiddle.Types import Text.Parsec hiding (token) import qualified Text.Parsec import Text.Printf type F = Either ParseError type S = [Token SourceSpan] type P = ParsecT S () Identity type A = Commented SourceSpan type Pa (a :: Stage -> (Type -> Type) -> Type -> Type) = P (a 'Parsed F (Commented SourceSpan)) type PaS (a :: (Type -> Type) -> Type -> Type) = P (a F (Commented SourceSpan)) commentP :: P Comment commentP = token $ \case (TokComment c) -> Just (NormalComment c) (TokDocComment c) -> Just (DocComment c) _ -> Nothing isComment :: Token s -> Bool isComment (Token t _) = case t of (TokComment _) -> True (TokDocComment _) -> True _ -> False -- Removes trailing comments from a list of tokens. Comments that don't preceed -- an actual language token have minimal semantic value and are thus discarded. stripTrailingComments :: [Token s] -> [Token s] stripTrailingComments = reverse . dropWhile isComment . reverse directedP :: (Annotated (t Parsed)) => Pa t -> PaS (Directed t 'Parsed) directedP subparser = withMetaLeaveComments $ do comments <- many commentP Directed <$> many directiveP <*> pushComments comments subparser pushComments :: (Annotated t) => [Comment] -> PaS t -> PaS t pushComments comments subparse = do setAnnot (\(Commented coms a) -> Commented (comments ++ coms) a) <$> subparse directiveP :: PaS Directive directiveP = withMeta $ Directive <$> defer directiveBodyTokens directiveBodyP directiveBodyP :: PaS DirectiveBody directiveBodyP = withMeta $ do DirectiveBody <$> many (directiveElementP <* (void (tok TokComma) <|> eof)) directiveElementP :: PaS DirectiveElement directiveElementP = withMeta $ do identifier1 <- nextTextP choice [ do tok TokColon let backend = identifier1 key <- nextTextP choice [ do tok TokEq DirectiveElementKeyValue (Just backend) key <$> directiveExpressionP, do return (DirectiveElementKey (Just backend) key) ], do tok TokEq let key = identifier1 DirectiveElementKeyValue Nothing key <$> directiveExpressionP, return $ DirectiveElementKey Nothing identifier1 ] nextTextP :: PaS Identifier nextTextP = withMeta $ Identifier <$> token textOf directiveExpressionP :: PaS DirectiveExpression directiveExpressionP = withMeta $ do choice [ do token $ \case (TokString str) -> Just $ DirectiveString str (TokLitNum num) -> Just $ DirectiveNumber num _ -> Nothing ] fiddleUnit :: Pa FiddleUnit fiddleUnit = do withMeta ( FiddleUnit () <$> many1 (directedP fiddleDeclP <* tok TokSemi) ) <* many commentP stringTokenP :: P Text stringTokenP = token ( \case (TokString str) -> Just str _ -> Nothing ) importListP :: PaS ImportList importListP = withMeta $ do tok TokLParen ImportList <$> many (ident <* (tok TokComma <|> lookAhead (tok TokRParen))) <* tok TokRParen importStatementP :: Pa ImportStatement importStatementP = withMeta $ ImportStatement <$> stringTokenP <*> optionMaybe importListP <*> pure () fiddleDeclP :: Pa FiddleDecl fiddleDeclP = do withMeta $ do t <- tokenType <$> anyToken case t of KWOption -> OptionDecl <$> nextTextP <*> nextTextP KWPackage -> PackageDecl () <$> name <*> defer body packageBodyP KWUsing -> UsingDecl (Witness ()) <$> name KWLocation -> LocationDecl () <$> ident <*> (tok TokEq >> expressionP) KWBits -> BitsDecl () <$> ident <*> (tok TokColon >> bitTypeP) KWImport -> ImportDecl <$> importStatementP KWType -> ObjTypeDecl () <$> ident <*> ( do tok TokColon bt <- bodyTypeP defer body (objTypeBodyP bt) ) KWInstance -> ObjectDecl () <$> ident <*> (tok KWAt *> expressionP) <*> (tok TokColon *> objTypeP) _ -> fail $ printf "Unexpected token %s. Expected top-level declaration." (show t) objTypeP :: Pa ObjType objTypeP = do base <- withMeta baseObjP recur' <- recur return $ recur' base where recur :: P (ObjType Parsed F A -> ObjType Parsed F A) recur = ( do withMeta $ do expr <- tok TokLBracket *> expressionP <* tok TokRBracket recur' <- recur return (\met base -> recur' (ArrayObjType base expr met)) ) <|> return id baseObjP :: P (A -> ObjType Parsed F A) baseObjP = (ReferencedObjType () <$> name) <|> ( do t <- bodyTypeP AnonymousObjType (Witness ()) <$> defer body (objTypeBodyP t) ) exprInParenP :: Pa Expression exprInParenP = tok TokLParen *> expressionP <* tok TokRParen objTypeBodyP :: BodyType F (Commented SourceSpan) -> Pa ObjTypeBody objTypeBodyP bt = withMeta $ ObjTypeBody bt <$> many (directedP objTypeDeclP <* tok TokSemi) objTypeDeclP :: Pa ObjTypeDecl objTypeDeclP = withMeta $ ( do tok KWAssertPos AssertPosStatement (Witness ()) <$> exprInParenP ) <|> ( do tok KWReserved ReservedDecl <$> exprInParenP ) <|> ( do bt <- bodyTypeP TypeSubStructure <$> defer body (objTypeBodyP bt) <*> optionMaybe ident ) <|> ( do mod <- optionMaybe modifierP tok KWReg RegisterDecl mod <$> optionMaybe ident <*> exprInParenP <*> optionMaybe (tok TokColon *> registerBodyP) ) modifierP :: PaS Modifier modifierP = withMeta $ ModifierKeyword <$> choice [ tok KWRo >> return Ro, tok KWRw >> return Rw, tok KWWo >> return Wo ] bitBodyTypeP :: PaS BodyType bitBodyTypeP = withMeta $ (tok KWStruct >> return Struct) <|> (tok KWUnion >> return Union) bodyTypeP :: PaS BodyType bodyTypeP = withMeta $ (tok KWStruct >> return Struct) <|> (tok KWUnion >> return Union) registerBodyP :: Pa RegisterBody registerBodyP = withMeta $ RegisterBody <$> bitBodyTypeP <*> defer body deferredRegisterBodyP deferredRegisterBodyP :: Pa DeferredRegisterBody deferredRegisterBodyP = withMeta $ DeferredRegisterBody <$> many (directedP registerBitsDeclP <* tok TokSemi) registerBitsDeclP :: Pa RegisterBitsDecl registerBitsDeclP = withMeta $ ( do tok KWReserved >> ReservedBits <$> exprInParenP ) <|> (BitsSubStructure <$> registerBodyP <*> optionMaybe ident) <|> ( DefinedBits <$> optionMaybe modifierP <*> ident <*> (tok TokColon >> registerBitsTypeRefP) ) registerBitsTypeRefP :: Pa RegisterBitsTypeRef registerBitsTypeRefP = do base <- baseTypeRef recur' <- recurP return (recur' base) where recurP :: P (RegisterBitsTypeRef Parsed F A -> RegisterBitsTypeRef Parsed F A) recurP = ( do withMeta $ do expr <- tok TokLBracket *> expressionP <* tok TokRBracket recur' <- recurP return (\met base -> recur' (RegisterBitsArray base expr met)) ) <|> return id baseTypeRef = withMeta $ (RegisterBitsJustBits <$> exprInParenP) <|> (RegisterBitsAnonymousType (Witness ()) <$> anonymousBitsTypeP) <|> (RegisterBitsReference () <$> name) anonymousBitsTypeP :: Pa AnonymousBitsType anonymousBitsTypeP = withMeta $ do tok KWEnum AnonymousEnumBody <$> exprInParenP <*> defer body enumBodyP bitTypeP :: Pa BitType bitTypeP = withMeta $ rawBits <|> enumType where rawBits = RawBits <$> (tok TokLParen *> expressionP <* tok TokRParen) enumType = do tok KWEnum expr <- exprInParenP EnumBitType expr <$> defer body enumBodyP enumBodyP :: Pa EnumBody enumBodyP = withMeta $ EnumBody <$> many (directedP enumConstantDeclP <* tok TokComma) enumConstantDeclP :: Pa EnumConstantDecl enumConstantDeclP = withMeta $ (tok KWReserved >> EnumConstantReserved <$> (tok TokEq >> expressionP)) <|> (EnumConstantDecl <$> ident <*> (tok TokEq >> expressionP)) expressionP :: Pa Expression expressionP = withMeta $ token $ \case (TokLitNum num) -> Just (LitNum num) (TokIdent i) -> Just $ \(Commented cs s) -> Var (Identifier i (Commented [] s)) (Commented cs s) _ -> Nothing body :: P [Token SourceSpan] body = do (_, b, _) <- body' return b directiveBodyTokens :: P [Token SourceSpan] directiveBodyTokens = do tokKeepComment TokDirectiveStart ret <- concat <$> manyTill ((: []) <$> anyToken) (lookAhead $ tokKeepComment TokDirectiveEnd) tokKeepComment TokDirectiveEnd return ret body' :: P (Token SourceSpan, [Token SourceSpan], Token SourceSpan) body' = do l <- tokKeepComment TokLBrace ret <- concat <$> manyTill ( ((\(b0, b1, b2) -> [b0] ++ b1 ++ [b2]) <$> body') <|> fmap (: []) anyToken ) (lookAhead $ tokKeepComment TokRBrace) r <- tokKeepComment TokRBrace next <- lookAhead anyToken return (l, stripTrailingComments ret, r) -- A deferred parsing takes a part of a text file (such as a body) and returns a -- deferred computation for parsing that section. -- -- This is useful because it allows for parse errors to be detected in multiple -- locations. This is because for things like bodies (stuff inside { ... }), we -- can parse the stuff inside the body as it's own, separate parsing. defer :: P [Token SourceSpan] -> P b -> P (F b) defer p0 pb = do sourcePos <- getPosition Text.Parsec.runParser ( do setPosition sourcePos pb <* eof ) () (sourceName sourcePos) <$> p0 packageBodyP :: Pa PackageBody packageBodyP = withMetaLeaveComments $ PackageBody <$> many ( directedP $ fiddleDeclP <* ( tok TokSemi <|> fail "Expected ';'" ) ) printNext :: P () printNext = do t <- lookAhead anyToken traceM $ "NextToken: " ++ show t return () ident :: PaS Identifier ident = withMeta $ token $ \case (TokIdent id) -> Just (Identifier id) _ -> Nothing name :: PaS Name name = withMeta $ do i <- ident is <- many $ do tok TokDot ident return $ Name (i :| is) -- Takes a some parsable thing p and automatically parses the comments before -- and after and sets the positions and adds it to the annotation. withMeta :: P (Commented SourceSpan -> b) -> P b withMeta p = do comments <- many commentP start <- getPosition fn <- p end <- getPosition return $ fn (Commented comments (SourceSpan start end)) -- Takes a some parsable thing p and automatically parses the comments before -- and after and sets the positions and adds it to the annotation. withMetaLeaveComments :: P (Commented SourceSpan -> b) -> P b withMetaLeaveComments p = do start <- getPosition fn <- p end <- getPosition return $ fn (Commented [] (SourceSpan start end)) token :: (T -> Maybe a) -> ParsecT S u Identity a token fn = Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\(Token t _) -> fn t) tokKeepComment :: T -> P (Token SourceSpan) tokKeepComment t' = do Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\tok@(Token t _) -> if t == t' then Just tok else Nothing) tok :: T -> P (Token SourceSpan) tok t' = do many commentP Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\tok@(Token t _) -> if t == t' then Just tok else Nothing) parseFiddleText :: String -> Text -> F (FiddleUnit 'Parsed F (Commented SourceSpan)) parseFiddleText sourceName txt = runIdentity . Text.Parsec.runParserT (fiddleUnit <* eof) () sourceName . stripTrailingComments =<< tokenize sourceName txt