{-# LANGUAGE LambdaCase #-} {-# LANGUAGE ScopedTypeVariables #-} module Language.Fiddle.Parser ( fiddleUnit, parseFiddleText, ) where import Data.Kind (Type) import Data.Functor.Identity import Data.Text (Text) import qualified Data.Text import Debug.Trace import Language.Fiddle.Ast import Language.Fiddle.Tokenizer import Language.Fiddle.Types import Text.Parsec hiding (token) import qualified Text.Parsec import Text.Printf type F = Either ParseError type S = [Token SourceSpan] type P = ParsecT S () Identity type A = Commented SourceSpan type Pa (a :: Stage -> (Type -> Type) -> Type -> Type) = P (a 'Stage1 F (Commented SourceSpan)) type PaS (a :: (Type -> Type) -> Type -> Type) = P (a F (Commented SourceSpan)) comment :: P Comment comment = token $ \case (TokComment c) -> Just (NormalComment c) (TokDocComment c) -> Just (DocComment c) _ -> Nothing isComment :: Token s -> Bool isComment (Token t _) = case t of (TokComment _) -> True (TokDocComment _) -> True _ -> False -- Removes trailing comments from a list of tokens. Comments that don't preceed -- an actual language token have minimal semantic value and are thus discarded. stripTrailingComments :: [Token s] -> [Token s] stripTrailingComments = reverse . dropWhile isComment . reverse fiddleUnit :: Pa FiddleUnit fiddleUnit = do withMeta ( FiddleUnit <$> many1 (fiddleDecl <* tok TokSemi) ) <* many comment fiddleDecl :: Pa FiddleDecl fiddleDecl = do withMeta $ do t <- tokenType <$> anyToken case t of KWOption -> OptionDecl <$> ident <*> ident KWPackage -> PackageDecl <$> ident <*> defer body packageBody KWLocation -> LocationDecl <$> ident <*> (tok TokEq >> expression) KWBits -> BitsDecl <$> ident <*> (tok TokColon >> bitType) KWObjtype -> ObjTypeDecl <$> ident <*> (tok TokColon >> defer body objTypeBody) KWObject -> ObjectDecl <$> ident <*> (tok KWAt *> expression) <*> (tok TokColon *> objType) _ -> fail $ printf "Unexpected token %s. Expected top-level declaration." (show t) -- choice -- [ tok KWOption >> OptionDecl <$> ident <*> ident, -- tok KWPackage >> PackageDecl -- <$> ident -- <*> defer body packageBody, -- tok KWLocation >> LocationDecl <$> ident <*> (tok TokEq >> expression), -- tok KWBits >> BitsDecl <$> ident <*> (tok TokColon >> bitType), -- tok KWObjtype -- >> ObjTypeDecl <$> ident <*> (tok TokColon >> defer body objTypeBody), -- tok KWObject -- >> ObjectDecl -- <$> ident -- <*> (tok KWAt *> expression) -- <*> (tok TokColon *> objType) -- ] objType :: Pa ObjType objType = do base <- withMeta baseObj recur' <- recur return $ recur' base where recur :: P (ObjType Stage1 F A -> ObjType Stage1 F A) recur = ( do withMeta $ do expr <- tok TokLBracket *> expression <* tok TokRBracket recur' <- recur return (\met base -> recur' (ArrayObjType base expr met)) ) <|> return id baseObj :: P (A -> ObjType Stage1 F A) baseObj = (ReferencedObjType <$> ident) <|> (AnonymousObjType <$> defer body objTypeBody) exprInParen :: Pa Expression exprInParen = tok TokLParen *> expression <* tok TokRParen objTypeBody :: Pa ObjTypeBody objTypeBody = withMeta $ ObjTypeBody <$> many (objTypeDecl <* tok TokSemi) objTypeDecl :: Pa ObjTypeDecl objTypeDecl = withMeta $ ( do tok KWAssertPos AssertPosStatement <$> exprInParen ) <|> ( do mod <- optionMaybe modifier tok KWReg RegisterDecl mod <$> optionMaybe ident <*> exprInParen <*> optionMaybe (tok TokColon *> registerBody) ) modifier :: PaS Modifier modifier = withMeta $ ModifierKeyword <$> choice [ tok KWRo >> return Ro, tok KWRw >> return Rw, tok KWWo >> return Wo ] registerBody :: Pa RegisterBody registerBody = withMeta $ RegisterBody <$> defer body deferredRegisterBody deferredRegisterBody :: Pa DeferredRegisterBody deferredRegisterBody = withMeta $ DeferredRegisterBody <$> many (registerBitsDecl <* tok TokSemi) registerBitsDecl :: Pa RegisterBitsDecl registerBitsDecl = withMeta $ ( do tok KWReserved >> ReservedBits <$> exprInParen ) <|> ( DefinedBits <$> optionMaybe modifier <*> ident <*> (tok TokColon >> registerBitsTypeRef) ) registerBitsTypeRef :: Pa RegisterBitsTypeRef registerBitsTypeRef = do base <- baseTypeRef recur' <- recur return (recur' base) where recur :: P (RegisterBitsTypeRef Stage1 F A -> RegisterBitsTypeRef Stage1 F A) recur = ( do withMeta $ do expr <- tok TokLBracket *> expression <* tok TokRBracket recur' <- recur return (\met base -> recur' (RegisterBitsArray base expr met)) ) <|> return id baseTypeRef = withMeta $ (RegisterBitsJustBits <$> exprInParen) <|> (RegisterBitsAnonymousType <$> anonymousBitsType) <|> (RegisterBitsReference <$> ident) anonymousBitsType :: Pa AnonymousBitsType anonymousBitsType = withMeta $ do tok KWEnum AnonymousEnumBody <$> exprInParen <*> defer body enumBody bitType :: Pa BitType bitType = withMeta $ rawBits <|> enumType where rawBits = RawBits <$> (tok TokLParen *> expression <* tok TokRParen) enumType = do tok KWEnum expr <- exprInParen EnumBitType expr <$> defer body enumBody enumBody :: Pa EnumBody enumBody = withMeta $ EnumBody <$> many (enumConstantDecl <* tok TokComma) enumConstantDecl :: Pa EnumConstantDecl enumConstantDecl = withMeta $ (tok KWReserved >> EnumConstantReserved <$> (tok TokEq >> expression)) <|> (EnumConstantDecl <$> ident <*> (tok TokEq >> expression)) expression :: Pa Expression expression = withMeta $ token $ \case (TokLitNum num) -> Just (LitNum num) (TokIdent i) -> Just $ \(Commented cs s) -> Var (Identifier i (Commented [] s)) (Commented cs s) _ -> Nothing body :: P [Token SourceSpan] body = do (_, b, _) <- body' return b body' :: P (Token SourceSpan, [Token SourceSpan], Token SourceSpan) body' = do l <- tokKeepComment TokLBrace ret <- concat <$> manyTill ( ((\(b0, b1, b2) -> [b0] ++ b1 ++ [b2]) <$> body') <|> fmap (: []) anyToken ) (lookAhead $ tokKeepComment TokRBrace) r <- tokKeepComment TokRBrace next <- lookAhead anyToken return (l, stripTrailingComments ret, r) -- A deferred parsing takes a part of a text file (such as a body) and returns a -- deferred computation for parsing that section. -- -- This is useful because it allows for parse errors to be detected in multiple -- locations. This is because for things like bodies (stuff inside { ... }), we -- can parse the stuff inside the body as it's own, separate parsing. defer :: P [Token SourceSpan] -> P b -> P (F b) defer p0 pb = do sourcePos <- getPosition Text.Parsec.runParser ( do setPosition sourcePos pb <* eof ) () (sourceName sourcePos) <$> p0 packageBody :: Pa PackageBody packageBody = withMeta $ PackageBody <$> many ( fiddleDecl <* ( tok TokSemi <|> fail "Expected ';'" ) ) printNext :: P () printNext = do t <- lookAhead anyToken traceM $ "NextToken: " ++ show t return () ident :: PaS Identifier ident = withMeta $ token $ \case (TokIdent id) -> Just (Identifier id) _ -> Nothing -- Takes a some parsable thing p and automatically parses the comments before -- and after and sets the positions and adds it to the annotation. withMeta :: P (Commented SourceSpan -> b) -> P b withMeta p = do comments <- many comment start <- getPosition fn <- p end <- getPosition return $ fn (Commented comments (SourceSpan start end)) token :: (T -> Maybe a) -> ParsecT S u Identity a token fn = Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\(Token t _) -> fn t) tokKeepComment :: T -> P (Token SourceSpan) tokKeepComment t' = do Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\tok@(Token t _) -> if t == t' then Just tok else Nothing) tok :: T -> P (Token SourceSpan) tok t' = do many comment Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\tok@(Token t _) -> if t == t' then Just tok else Nothing) parseFiddleText :: String -> Text -> F (FiddleUnit 'Stage1 F (Commented SourceSpan)) parseFiddleText sourceName txt = runIdentity . Text.Parsec.runParserT (fiddleUnit <* eof) () sourceName . stripTrailingComments =<< tokenize sourceName txt