diff options
Diffstat (limited to 'src/Language/Fiddle/Parser.hs')
-rw-r--r-- | src/Language/Fiddle/Parser.hs | 264 |
1 files changed, 251 insertions, 13 deletions
diff --git a/src/Language/Fiddle/Parser.hs b/src/Language/Fiddle/Parser.hs index 3a44b31..16489c1 100644 --- a/src/Language/Fiddle/Parser.hs +++ b/src/Language/Fiddle/Parser.hs @@ -1,21 +1,259 @@ +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE ScopedTypeVariables #-} -module Language.Fiddle.Parser where +module Language.Fiddle.Parser + ( fiddleUnit, + parseFiddleText, + ) +where -import Language.Fiddle.Types import Data.Functor.Identity -import Data.Text -import Text.Parsec +import Data.Text (Text) +import qualified Data.Text +import Language.Fiddle.Ast +import Language.Fiddle.Tokenizer +import Language.Fiddle.Types +import Text.Parsec hiding (token) +import qualified Text.Parsec + +type F = Either ParseError + +type S = [Token SourceSpan] + +type P = ParsecT S () Identity + +type Pa (a :: Stage -> (* -> *) -> * -> *) = P (a 'Stage1 F (Commented SourceSpan)) + +fiddleUnit :: Pa FiddleUnit +fiddleUnit = + withMeta $ + FiddleUnit <$> many (fiddleDecl <* tok TokSemi) + +fiddleDecl :: Pa FiddleDecl +fiddleDecl = + withMeta $ + choice + [ tok KWOption >> OptionDecl <$> ident <*> ident, + tok KWPackage >> PackageDecl + <$> ident + <*> defer body packageBody, + tok KWLocation >> LocationDecl <$> ident <*> (tok TokEq >> expression), + tok KWBits >> BitsDecl <$> ident <*> (tok TokColon >> bitType), + tok KWObjtype + >> ObjTypeDecl <$> ident <*> (tok TokColon >> defer body objTypeBody), + tok KWObject + >> ObjectDecl + <$> ident + <*> (tok KWAt *> expression) + <*> (tok TokColon *> objType) + ] + +objType :: Pa ObjType +objType = recur objType' + where + recur b = do + recur + ( try $ + withMeta $ do + t <- b + e <- tok TokLBrace *> expression <* tok TokRBrace + return (ArrayObjType t e) + ) + <|> b + + objType' = + withMeta $ + (ReferencedObjType <$> ident) + <|> (AnonymousObjType <$> defer body objTypeBody) + +exprInParen :: P (Expression Stage1 (Commented SourceSpan)) +exprInParen = tok TokLParen *> expression <* tok TokRParen + +objTypeBody :: Pa ObjTypeBody +objTypeBody = + withMeta $ + withinBody $ + ObjTypeBody <$> many (objTypeDecl <* tok TokSemi) + +objTypeDecl :: Pa ObjTypeDecl +objTypeDecl = + withMeta $ + ( do + tok KWAssertPos + AssertPosStatement <$> exprInParen + ) + <|> ( do + tok KWReg + RegisterDecl + <$> optionMaybe modifier + <*> optionMaybe ident + <*> exprInParen + <*> optionMaybe (tok TokColon *> registerBody) + ) + +modifier :: P (Modifier (Commented SourceSpan)) +modifier = + withMeta $ + ModifierKeyword + <$> choice + [ tok KWRo >> return Ro, + tok KWRw >> return Rw, + tok KWWo >> return Wo + ] + +registerBody :: Pa RegisterBody +registerBody = withMeta $ RegisterBody <$> defer body deferredRegisterBody + +deferredRegisterBody :: Pa DeferredRegisterBody +deferredRegisterBody = + withMeta $ + withinBody $ + DeferredRegisterBody <$> many (registerBitsDecl <* tok TokSemi) -newtype FiddleUnit a = FiddleUnit a +registerBitsDecl :: Pa RegisterBitsDecl +registerBitsDecl = + withMeta $ + ( do + tok KWReserved >> ReservedBits <$> exprInParen + ) + <|> ( DefinedBits <$> optionMaybe modifier + <*> ident + <*> registerBitsTypeRef + ) -parseUnit :: ParsecT s u m (FiddleUnit Metadata) -parseUnit = undefined +registerBitsTypeRef :: Pa RegisterBitsTypeRef +registerBitsTypeRef = recur typeRef + where + recur b = do + recur + ( try $ + withMeta $ do + t <- b + e <- tok TokLBrace *> expression <* tok TokRBrace + return (RegisterBitsArray t e) + ) + <|> b + + typeRef = + withMeta $ + (tok KWEnum >> RegisterBitsAnonymousType <$> anonymousBitsType) + <|> (RegisterBitsReference <$> ident) + +anonymousBitsType :: P (AnonymousBitsType F (Commented SourceSpan)) +anonymousBitsType = withMeta $ do + tok KWEnum + AnonymousEnumBody <$> exprInParen <*> defer body enumBody + +bitType :: Pa BitType +bitType = withMeta $ rawBits <|> enumType + where + rawBits = RawBits <$> (tok TokLParen *> expression <* tok TokRParen) + enumType = do + tok KWEnum + expr <- exprInParen + EnumBitType expr <$> defer body enumBody + +enumBody :: Pa EnumBody +enumBody = + withMeta $ + withinBody $ + EnumBody <$> many enumConstantDecl + +enumConstantDecl :: P (EnumConstantDecl Stage1 (Commented SourceSpan)) +enumConstantDecl = + withMeta $ + (tok KWReserved >> EnumConstantReserved <$> exprInParen) + <|> (EnumConstantDecl <$> ident <*> (tok TokEq >> expression)) + +expression :: P (Expression 'Stage1 (Commented SourceSpan)) +expression = withMeta $ + token $ \case + (TokLitNum num) -> Just (LitNum num) + (TokIdent i) -> Just $ + \(Commented cs s) -> Var (Identifier i (Commented [] s)) (Commented cs s) + _ -> Nothing + +body :: P [Token SourceSpan] +body = do + t0 <- tok TokLBrace + ret <- + concat + <$> manyTill + ( body <|> fmap (: []) anyToken + ) + (lookAhead $ tok TokRBrace) + return $ t0 : ret + +-- Parses something within braces. +withinBody :: P a -> P a +withinBody p = tok TokLBrace *> p <* tok TokRBrace + +-- A deferred parsing takes a part of a text file (such as a body) and returns a +-- deferred computation for parsing that section. +-- +-- This is useful because it allows for parse errors to be detected in multiple +-- locations. This is because for things like bodies (stuff inside { ... }), we +-- can parse the stuff inside the body as it's own, separate parsing. +defer :: P [Token SourceSpan] -> P b -> P (F b) +defer p0 pb = do + contents <- p0 + sourcePos <- getPosition -parseText :: String -> Text -> Either String (FiddleUnit Metadata) -parseText sourceName txt = runIdentity $ do - res <- Text.Parsec.runParserT parseUnit () sourceName [] return $ - case res of - Left pe -> Left (show pe) - Right a -> Right a + Text.Parsec.runParser + ( do + setPosition sourcePos + pb <* eof + ) + () + (sourceName sourcePos) + contents + +packageBody :: Pa PackageBody +packageBody = + withMeta $ + withinBody $ + PackageBody <$> many (fiddleDecl <* tok TokSemi) + +ident :: P (Identifier (Commented SourceSpan)) +ident = + withMeta $ + token $ \case + (TokIdent id) -> Just (Identifier id) + _ -> Nothing + +-- Takes a some parsable thing p and automatically parses the comments before +-- and after and sets the positions and adds it to the annotation. +withMeta :: P (Commented SourceSpan -> b) -> P b +withMeta p = try $ do + comments <- many comment + (Token _ (SourceSpan start _)) <- lookAhead anyToken + fn <- p + (Token _ (SourceSpan end _)) <- lookAhead anyToken + + return $ fn (Commented comments (SourceSpan start end)) + where + comment :: P Comment + comment = + token $ \case + (TokComment c) -> Just (NormalComment c) + (TokDocComment c) -> Just (DocComment c) + _ -> Nothing + +token :: (T -> Maybe a) -> ParsecT S u Identity a +token fn = + Text.Parsec.token + (\(Token t _) -> show t) + (\(Token _ (SourceSpan s1 _)) -> s1) + (\(Token t _) -> fn t) + +tok :: T -> P (Token SourceSpan) +tok t' = + Text.Parsec.token + (\(Token t _) -> show t) + (\(Token _ (SourceSpan s1 _)) -> s1) + (\tok@(Token t _) -> if t == t' then Just tok else Nothing) + +parseFiddleText :: String -> Text -> F (FiddleUnit 'Stage1 F (Commented SourceSpan)) +parseFiddleText sourceName txt = runIdentity $ do + Text.Parsec.runParserT fiddleUnit () sourceName [] |