{-# LANGUAGE LambdaCase #-} {-# LANGUAGE ScopedTypeVariables #-} module Language.Fiddle.Parser ( fiddleUnit, parseFiddleText, ) where import Control.Monad (void) import Data.Functor.Identity import Data.Kind (Type) import Data.List.NonEmpty (NonEmpty (..)) import Data.Text (Text) import qualified Data.Text import Debug.Trace import Language.Fiddle.Ast import Language.Fiddle.Tokenizer import Language.Fiddle.Types import Text.Parsec hiding (token) import qualified Text.Parsec import Text.Printf type F = Either ParseError type S = [Token SourceSpan] type P = ParsecT S () Identity type A = Commented SourceSpan type Pa (a :: Stage -> (Type -> Type) -> Type -> Type) = P (a 'Stage1 F (Commented SourceSpan)) type PaS (a :: (Type -> Type) -> Type -> Type) = P (a F (Commented SourceSpan)) comment :: P Comment comment = token $ \case (TokComment c) -> Just (NormalComment c) (TokDocComment c) -> Just (DocComment c) _ -> Nothing isComment :: Token s -> Bool isComment (Token t _) = case t of (TokComment _) -> True (TokDocComment _) -> True _ -> False -- Removes trailing comments from a list of tokens. Comments that don't preceed -- an actual language token have minimal semantic value and are thus discarded. stripTrailingComments :: [Token s] -> [Token s] stripTrailingComments = reverse . dropWhile isComment . reverse directed :: Pa t -> PaS (Directed (t 'Stage1)) directed subparser = withMeta $ do Directed <$> many directive <*> subparser directive :: PaS Directive directive = withMeta $ Directive <$> defer directiveBodyTokens directiveBody directiveBody :: PaS DirectiveBody directiveBody = withMeta $ do DirectiveBody <$> many (directiveElement <* (void (tok TokComma) <|> eof)) directiveElement :: PaS DirectiveElement directiveElement = withMeta $ do identifier1 <- nextText choice [ do tok TokColon let backend = identifier1 key <- nextText choice [ do tok TokEq DirectiveElementKeyValue (Just backend) key <$> directiveExpression, do return (DirectiveElementKey (Just backend) key) ], do tok TokEq let key = identifier1 DirectiveElementKeyValue Nothing key <$> directiveExpression, return $ DirectiveElementKey Nothing identifier1 ] nextText :: PaS Identifier nextText = withMeta $ Identifier <$> token textOf directiveExpression :: PaS DirectiveExpression directiveExpression = withMeta $ do choice [ do token $ \case (TokString str) -> Just $ DirectiveString str (TokLitNum num) -> Just $ DirectiveNumber num _ -> Nothing ] fiddleUnit :: Pa FiddleUnit fiddleUnit = do withMeta ( FiddleUnit <$> many1 (directed fiddleDecl <* tok TokSemi) ) <* many comment stringToken :: P Text stringToken = token ( \case (TokString str) -> Just str _ -> Nothing ) importList :: PaS ImportList importList = withMeta $ do tok TokLParen ImportList <$> many (ident <* (tok TokComma <|> lookAhead (tok TokRParen))) <* tok TokRParen importStatement :: PaS ImportStatement importStatement = withMeta $ ImportStatement <$> stringToken <*> optionMaybe importList fiddleDecl :: Pa FiddleDecl fiddleDecl = do withMeta $ do t <- tokenType <$> anyToken case t of KWOption -> OptionDecl <$> nextText <*> nextText KWPackage -> PackageDecl <$> name <*> defer body packageBody KWUsing -> UsingDecl <$> name KWLocation -> LocationDecl <$> ident <*> (tok TokEq >> expression) KWBits -> BitsDecl <$> ident <*> (tok TokColon >> bitType) KWImport -> ImportDecl <$> importStatement KWType -> ObjTypeDecl <$> ident <*> ( do tok TokColon bt <- bodyType defer body (objTypeBody bt) ) KWInstance -> ObjectDecl <$> ident <*> (tok KWAt *> expression) <*> (tok TokColon *> objType) _ -> fail $ printf "Unexpected token %s. Expected top-level declaration." (show t) -- choice -- [ tok KWOption >> OptionDecl <$> ident <*> ident, -- tok KWPackage >> PackageDecl -- <$> ident -- <*> defer body packageBody, -- tok KWLocation >> LocationDecl <$> ident <*> (tok TokEq >> expression), -- tok KWBits >> BitsDecl <$> ident <*> (tok TokColon >> bitType), -- tok KWObjtype -- >> ObjTypeDecl <$> ident <*> (tok TokColon >> defer body objTypeBody), -- tok KWObject -- >> ObjectDecl -- <$> ident -- <*> (tok KWAt *> expression) -- <*> (tok TokColon *> objType) -- ] objType :: Pa ObjType objType = do base <- withMeta baseObj recur' <- recur return $ recur' base where recur :: P (ObjType Stage1 F A -> ObjType Stage1 F A) recur = ( do withMeta $ do expr <- tok TokLBracket *> expression <* tok TokRBracket recur' <- recur return (\met base -> recur' (ArrayObjType base expr met)) ) <|> return id baseObj :: P (A -> ObjType Stage1 F A) baseObj = (ReferencedObjType <$> name) <|> ( do t <- bodyType AnonymousObjType <$> defer body (objTypeBody t) ) exprInParen :: Pa Expression exprInParen = tok TokLParen *> expression <* tok TokRParen objTypeBody :: BodyType F (Commented SourceSpan) -> Pa ObjTypeBody objTypeBody bt = withMeta $ ObjTypeBody bt <$> many (directed objTypeDecl <* tok TokSemi) objTypeDecl :: Pa ObjTypeDecl objTypeDecl = withMeta $ ( do tok KWAssertPos AssertPosStatement <$> exprInParen ) <|> ( do tok KWReserved ReservedDecl <$> exprInParen ) <|> ( do bt <- bodyType TypeSubStructure <$> defer body (objTypeBody bt) <*> optionMaybe ident ) <|> ( do mod <- optionMaybe modifier tok KWReg RegisterDecl mod <$> optionMaybe ident <*> exprInParen <*> optionMaybe (tok TokColon *> registerBody) ) modifier :: PaS Modifier modifier = withMeta $ ModifierKeyword <$> choice [ tok KWRo >> return Ro, tok KWRw >> return Rw, tok KWWo >> return Wo ] bitBodyType :: PaS BodyType bitBodyType = withMeta $ (tok KWStruct >> return Struct) <|> (tok KWUnion >> return Union) bodyType :: PaS BodyType bodyType = withMeta $ (tok KWStruct >> return Struct) <|> (tok KWUnion >> return Union) registerBody :: Pa RegisterBody registerBody = withMeta $ RegisterBody <$> bitBodyType <*> defer body deferredRegisterBody deferredRegisterBody :: Pa DeferredRegisterBody deferredRegisterBody = withMeta $ DeferredRegisterBody <$> many (directed registerBitsDecl <* tok TokSemi) registerBitsDecl :: Pa RegisterBitsDecl registerBitsDecl = withMeta $ ( do tok KWReserved >> ReservedBits <$> exprInParen ) <|> (BitsSubStructure <$> registerBody <*> optionMaybe ident) <|> ( DefinedBits <$> optionMaybe modifier <*> ident <*> (tok TokColon >> registerBitsTypeRef) ) registerBitsTypeRef :: Pa RegisterBitsTypeRef registerBitsTypeRef = do base <- baseTypeRef recur' <- recur return (recur' base) where recur :: P (RegisterBitsTypeRef Stage1 F A -> RegisterBitsTypeRef Stage1 F A) recur = ( do withMeta $ do expr <- tok TokLBracket *> expression <* tok TokRBracket recur' <- recur return (\met base -> recur' (RegisterBitsArray base expr met)) ) <|> return id baseTypeRef = withMeta $ (RegisterBitsJustBits <$> exprInParen) <|> (RegisterBitsAnonymousType <$> anonymousBitsType) <|> (RegisterBitsReference <$> name) anonymousBitsType :: Pa AnonymousBitsType anonymousBitsType = withMeta $ do tok KWEnum AnonymousEnumBody <$> exprInParen <*> defer body enumBody bitType :: Pa BitType bitType = withMeta $ rawBits <|> enumType where rawBits = RawBits <$> (tok TokLParen *> expression <* tok TokRParen) enumType = do tok KWEnum expr <- exprInParen EnumBitType expr <$> defer body enumBody enumBody :: Pa EnumBody enumBody = withMeta $ EnumBody <$> many (directed enumConstantDecl <* tok TokComma) enumConstantDecl :: Pa EnumConstantDecl enumConstantDecl = withMeta $ (tok KWReserved >> EnumConstantReserved <$> (tok TokEq >> expression)) <|> (EnumConstantDecl <$> ident <*> (tok TokEq >> expression)) expression :: Pa Expression expression = withMeta $ token $ \case (TokLitNum num) -> Just (LitNum num) (TokIdent i) -> Just $ \(Commented cs s) -> Var (Identifier i (Commented [] s)) (Commented cs s) _ -> Nothing body :: P [Token SourceSpan] body = do (_, b, _) <- body' return b directiveBodyTokens :: P [Token SourceSpan] directiveBodyTokens = do tokKeepComment TokDirectiveStart ret <- concat <$> manyTill ((: []) <$> anyToken) (lookAhead $ tokKeepComment TokDirectiveEnd) tokKeepComment TokDirectiveEnd return ret body' :: P (Token SourceSpan, [Token SourceSpan], Token SourceSpan) body' = do l <- tokKeepComment TokLBrace ret <- concat <$> manyTill ( ((\(b0, b1, b2) -> [b0] ++ b1 ++ [b2]) <$> body') <|> fmap (: []) anyToken ) (lookAhead $ tokKeepComment TokRBrace) r <- tokKeepComment TokRBrace next <- lookAhead anyToken return (l, stripTrailingComments ret, r) -- A deferred parsing takes a part of a text file (such as a body) and returns a -- deferred computation for parsing that section. -- -- This is useful because it allows for parse errors to be detected in multiple -- locations. This is because for things like bodies (stuff inside { ... }), we -- can parse the stuff inside the body as it's own, separate parsing. defer :: P [Token SourceSpan] -> P b -> P (F b) defer p0 pb = do sourcePos <- getPosition Text.Parsec.runParser ( do setPosition sourcePos pb <* eof ) () (sourceName sourcePos) <$> p0 packageBody :: Pa PackageBody packageBody = withMeta $ PackageBody <$> many ( directed $ fiddleDecl <* ( tok TokSemi <|> fail "Expected ';'" ) ) printNext :: P () printNext = do t <- lookAhead anyToken traceM $ "NextToken: " ++ show t return () ident :: PaS Identifier ident = withMeta $ token $ \case (TokIdent id) -> Just (Identifier id) _ -> Nothing name :: PaS Name name = withMeta $ do i <- ident is <- many $ do tok TokDot ident return $ Name (i :| is) -- Takes a some parsable thing p and automatically parses the comments before -- and after and sets the positions and adds it to the annotation. withMeta :: P (Commented SourceSpan -> b) -> P b withMeta p = do comments <- many comment start <- getPosition fn <- p end <- getPosition return $ fn (Commented comments (SourceSpan start end)) token :: (T -> Maybe a) -> ParsecT S u Identity a token fn = Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\(Token t _) -> fn t) tokKeepComment :: T -> P (Token SourceSpan) tokKeepComment t' = do Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\tok@(Token t _) -> if t == t' then Just tok else Nothing) tok :: T -> P (Token SourceSpan) tok t' = do many comment Text.Parsec.token (\(Token t _) -> show t) (\(Token _ (SourceSpan s1 _)) -> s1) (\tok@(Token t _) -> if t == t' then Just tok else Nothing) parseFiddleText :: String -> Text -> F (FiddleUnit 'Stage1 F (Commented SourceSpan)) parseFiddleText sourceName txt = runIdentity . Text.Parsec.runParserT (fiddleUnit <* eof) () sourceName . stripTrailingComments =<< tokenize sourceName txt