diff options
-rw-r--r-- | src/Language/Fiddle/Ast.hs | 151 | ||||
-rw-r--r-- | src/Language/Fiddle/Compiler.hs | 8 | ||||
-rw-r--r-- | src/Language/Fiddle/Parser.hs | 264 | ||||
-rw-r--r-- | src/Language/Fiddle/Tokenizer.hs | 31 | ||||
-rw-r--r-- | src/Language/Fiddle/Types.hs | 4 |
5 files changed, 432 insertions, 26 deletions
diff --git a/src/Language/Fiddle/Ast.hs b/src/Language/Fiddle/Ast.hs new file mode 100644 index 0000000..23fb05f --- /dev/null +++ b/src/Language/Fiddle/Ast.hs @@ -0,0 +1,151 @@ +module Language.Fiddle.Ast where + +import Data.Text (Text) + +-- Stage of compilation. Parts of the AST maybe un unavailable with other stages +-- as compilation simplifies the AST. +data Stage = Stage1 | Stage2 | Stage3 + +-- Just an identifier. +data Identifier a = Identifier !Text a + +-- Expression. +data Expression stage a where + -- Just a string. Parsing the number comes in stage2. + LitNum :: Text -> a -> Expression 'Stage1 a + RealNum :: Integer -> a -> Expression 'Stage2 a + Var :: Identifier a -> a -> Expression stage a + +-- Root of the parse tree. Just contains a list of declarations. +data FiddleUnit (stage :: Stage) (f :: * -> *) a where + FiddleUnit :: [FiddleDecl stage f a] -> a -> FiddleUnit stage f a + +-- Top-level declarations. +data FiddleDecl (stage :: Stage) (f :: * -> *) a where + {- + - An option is a key/value pair. + - option <ident> <ident>; + -} + OptionDecl :: Identifier a -> Identifier a -> a -> FiddleDecl stage f a + {- Package Statement. Package Name, Package body -} + PackageDecl :: + Identifier a -> + f (PackageBody stage f a) -> + a -> + FiddleDecl stage f a + {- location <identifier> = <expr>. -} + LocationDecl :: + Identifier a -> + Expression stage a -> + a -> + FiddleDecl stage f a + {- bits <identifier> : <type> -} + BitsDecl :: + Identifier a -> + BitType stage f a -> + a -> + FiddleDecl stage f a + {- objtype <identifier> : <type> -} + ObjTypeDecl :: + Identifier a -> + f (ObjTypeBody stage f a) -> + a -> + FiddleDecl stage f a + {- object <ident> at <expr> : <type> -} + ObjectDecl :: + Identifier a -> + Expression stage a -> + ObjType stage f a -> + a -> + FiddleDecl stage f a + +data ObjType stage f a where + -- { <body> } + -- Anonymous types are only allowed in stage1. Stage2 should have them be + -- de-anonymized. + AnonymousObjType :: f (ObjTypeBody 'Stage1 f a) -> a -> ObjType 'Stage1 f a + -- <type>[<expr>] + ArrayObjType :: ObjType stage f a -> Expression stage a -> a -> ObjType stage f a + -- <identifier> + ReferencedObjType :: Identifier a -> a -> ObjType stage f a + +data ObjTypeBody (stage :: Stage) (f :: * -> *) a where + ObjTypeBody :: [ObjTypeDecl stage f a] -> a -> ObjTypeBody stage f a + +data ObjTypeDecl stage f a where + {- assert_pos(<expr>) -} + AssertPosStatement :: Expression stage a -> a -> ObjTypeDecl stage f a + {- reg <ident>(<expr>) : <regtype> -} + RegisterDecl :: + Maybe (Modifier a) -> + Maybe (Identifier a) -> + Expression stage a -> + Maybe (RegisterBody stage f a) -> + a -> + ObjTypeDecl stage f a + +data Modifier a where + ModifierKeyword :: ModifierKeyword -> a -> Modifier a + +data ModifierKeyword = Rw | Ro | Wo + +data DeferredRegisterBody stage f a where + DeferredRegisterBody :: + [RegisterBitsDecl stage f a] -> + a -> + DeferredRegisterBody stage f a + +data RegisterBody stage f a where + RegisterBody :: f (DeferredRegisterBody stage f a) -> a -> RegisterBody stage f a + +data RegisterBitsDecl stage f a where + -- reserved(<expr>) + ReservedBits :: Expression stage a -> a -> RegisterBitsDecl stage f a + -- <modifer> <ident> : <type> + DefinedBits :: + Maybe (Modifier a) -> + Identifier a -> + RegisterBitsTypeRef stage f a -> + a -> + RegisterBitsDecl stage f a + +data RegisterBitsTypeRef stage f a where + -- <type>[<expr>] + RegisterBitsArray :: + RegisterBitsTypeRef stage f a -> + Expression stage a -> + a -> + RegisterBitsTypeRef stage f a + {- Reference to a type. -} + RegisterBitsReference :: Identifier a -> a -> RegisterBitsTypeRef stage f a + {- enum(<expr>) { <body> } + Anonymous types are only allowed in stage1. + Stage2 should de-anonymize these type. -} + RegisterBitsAnonymousType :: + AnonymousBitsType f a -> + a -> + RegisterBitsTypeRef 'Stage1 f a + +data AnonymousBitsType f a where + -- enum(<expr>) { <body> } + AnonymousEnumBody :: Expression 'Stage1 a -> f (EnumBody stage f a) -> a -> AnonymousBitsType f a + +data BitType (stage :: Stage) (f :: * -> *) a where + -- enum(<expr>) { <body> } + EnumBitType :: Expression stage a -> f (EnumBody stage f a) -> a -> BitType stage f a + -- (<expr>) + RawBits :: Expression stage a -> a -> BitType stage f a + +data EnumBody (stage :: Stage) (f :: * -> *) a where + -- <decl>, + EnumBody :: [EnumConstantDecl stage a] -> a -> EnumBody stage f a + +data EnumConstantDecl stage a where + -- <ident> = <expr> + EnumConstantDecl :: Identifier a -> Expression stage a -> a -> EnumConstantDecl stage a + -- reserved = <expr> + EnumConstantReserved :: Expression stage a -> a -> EnumConstantDecl stage a + +data PackageBody (stage :: Stage) (f :: * -> *) a where + {- The body of a package -} + PackageBody :: [FiddleDecl stage f a] -> a -> PackageBody stage f a diff --git a/src/Language/Fiddle/Compiler.hs b/src/Language/Fiddle/Compiler.hs new file mode 100644 index 0000000..9bc9eb9 --- /dev/null +++ b/src/Language/Fiddle/Compiler.hs @@ -0,0 +1,8 @@ +module Language.Fiddle.Compiler where + +import Language.Fiddle.Ast + +-- Converts a Stage1 AST to a Stage2 AST. +compileStage2 :: + FiddleUnit 'Stage1 Identity Metadata -> FiddleUnit 'Stage2 Identity Metadata +compileStage2 = undefined diff --git a/src/Language/Fiddle/Parser.hs b/src/Language/Fiddle/Parser.hs index 3a44b31..16489c1 100644 --- a/src/Language/Fiddle/Parser.hs +++ b/src/Language/Fiddle/Parser.hs @@ -1,21 +1,259 @@ +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE ScopedTypeVariables #-} -module Language.Fiddle.Parser where +module Language.Fiddle.Parser + ( fiddleUnit, + parseFiddleText, + ) +where -import Language.Fiddle.Types import Data.Functor.Identity -import Data.Text -import Text.Parsec +import Data.Text (Text) +import qualified Data.Text +import Language.Fiddle.Ast +import Language.Fiddle.Tokenizer +import Language.Fiddle.Types +import Text.Parsec hiding (token) +import qualified Text.Parsec + +type F = Either ParseError + +type S = [Token SourceSpan] + +type P = ParsecT S () Identity + +type Pa (a :: Stage -> (* -> *) -> * -> *) = P (a 'Stage1 F (Commented SourceSpan)) + +fiddleUnit :: Pa FiddleUnit +fiddleUnit = + withMeta $ + FiddleUnit <$> many (fiddleDecl <* tok TokSemi) + +fiddleDecl :: Pa FiddleDecl +fiddleDecl = + withMeta $ + choice + [ tok KWOption >> OptionDecl <$> ident <*> ident, + tok KWPackage >> PackageDecl + <$> ident + <*> defer body packageBody, + tok KWLocation >> LocationDecl <$> ident <*> (tok TokEq >> expression), + tok KWBits >> BitsDecl <$> ident <*> (tok TokColon >> bitType), + tok KWObjtype + >> ObjTypeDecl <$> ident <*> (tok TokColon >> defer body objTypeBody), + tok KWObject + >> ObjectDecl + <$> ident + <*> (tok KWAt *> expression) + <*> (tok TokColon *> objType) + ] + +objType :: Pa ObjType +objType = recur objType' + where + recur b = do + recur + ( try $ + withMeta $ do + t <- b + e <- tok TokLBrace *> expression <* tok TokRBrace + return (ArrayObjType t e) + ) + <|> b + + objType' = + withMeta $ + (ReferencedObjType <$> ident) + <|> (AnonymousObjType <$> defer body objTypeBody) + +exprInParen :: P (Expression Stage1 (Commented SourceSpan)) +exprInParen = tok TokLParen *> expression <* tok TokRParen + +objTypeBody :: Pa ObjTypeBody +objTypeBody = + withMeta $ + withinBody $ + ObjTypeBody <$> many (objTypeDecl <* tok TokSemi) + +objTypeDecl :: Pa ObjTypeDecl +objTypeDecl = + withMeta $ + ( do + tok KWAssertPos + AssertPosStatement <$> exprInParen + ) + <|> ( do + tok KWReg + RegisterDecl + <$> optionMaybe modifier + <*> optionMaybe ident + <*> exprInParen + <*> optionMaybe (tok TokColon *> registerBody) + ) + +modifier :: P (Modifier (Commented SourceSpan)) +modifier = + withMeta $ + ModifierKeyword + <$> choice + [ tok KWRo >> return Ro, + tok KWRw >> return Rw, + tok KWWo >> return Wo + ] + +registerBody :: Pa RegisterBody +registerBody = withMeta $ RegisterBody <$> defer body deferredRegisterBody + +deferredRegisterBody :: Pa DeferredRegisterBody +deferredRegisterBody = + withMeta $ + withinBody $ + DeferredRegisterBody <$> many (registerBitsDecl <* tok TokSemi) -newtype FiddleUnit a = FiddleUnit a +registerBitsDecl :: Pa RegisterBitsDecl +registerBitsDecl = + withMeta $ + ( do + tok KWReserved >> ReservedBits <$> exprInParen + ) + <|> ( DefinedBits <$> optionMaybe modifier + <*> ident + <*> registerBitsTypeRef + ) -parseUnit :: ParsecT s u m (FiddleUnit Metadata) -parseUnit = undefined +registerBitsTypeRef :: Pa RegisterBitsTypeRef +registerBitsTypeRef = recur typeRef + where + recur b = do + recur + ( try $ + withMeta $ do + t <- b + e <- tok TokLBrace *> expression <* tok TokRBrace + return (RegisterBitsArray t e) + ) + <|> b + + typeRef = + withMeta $ + (tok KWEnum >> RegisterBitsAnonymousType <$> anonymousBitsType) + <|> (RegisterBitsReference <$> ident) + +anonymousBitsType :: P (AnonymousBitsType F (Commented SourceSpan)) +anonymousBitsType = withMeta $ do + tok KWEnum + AnonymousEnumBody <$> exprInParen <*> defer body enumBody + +bitType :: Pa BitType +bitType = withMeta $ rawBits <|> enumType + where + rawBits = RawBits <$> (tok TokLParen *> expression <* tok TokRParen) + enumType = do + tok KWEnum + expr <- exprInParen + EnumBitType expr <$> defer body enumBody + +enumBody :: Pa EnumBody +enumBody = + withMeta $ + withinBody $ + EnumBody <$> many enumConstantDecl + +enumConstantDecl :: P (EnumConstantDecl Stage1 (Commented SourceSpan)) +enumConstantDecl = + withMeta $ + (tok KWReserved >> EnumConstantReserved <$> exprInParen) + <|> (EnumConstantDecl <$> ident <*> (tok TokEq >> expression)) + +expression :: P (Expression 'Stage1 (Commented SourceSpan)) +expression = withMeta $ + token $ \case + (TokLitNum num) -> Just (LitNum num) + (TokIdent i) -> Just $ + \(Commented cs s) -> Var (Identifier i (Commented [] s)) (Commented cs s) + _ -> Nothing + +body :: P [Token SourceSpan] +body = do + t0 <- tok TokLBrace + ret <- + concat + <$> manyTill + ( body <|> fmap (: []) anyToken + ) + (lookAhead $ tok TokRBrace) + return $ t0 : ret + +-- Parses something within braces. +withinBody :: P a -> P a +withinBody p = tok TokLBrace *> p <* tok TokRBrace + +-- A deferred parsing takes a part of a text file (such as a body) and returns a +-- deferred computation for parsing that section. +-- +-- This is useful because it allows for parse errors to be detected in multiple +-- locations. This is because for things like bodies (stuff inside { ... }), we +-- can parse the stuff inside the body as it's own, separate parsing. +defer :: P [Token SourceSpan] -> P b -> P (F b) +defer p0 pb = do + contents <- p0 + sourcePos <- getPosition -parseText :: String -> Text -> Either String (FiddleUnit Metadata) -parseText sourceName txt = runIdentity $ do - res <- Text.Parsec.runParserT parseUnit () sourceName [] return $ - case res of - Left pe -> Left (show pe) - Right a -> Right a + Text.Parsec.runParser + ( do + setPosition sourcePos + pb <* eof + ) + () + (sourceName sourcePos) + contents + +packageBody :: Pa PackageBody +packageBody = + withMeta $ + withinBody $ + PackageBody <$> many (fiddleDecl <* tok TokSemi) + +ident :: P (Identifier (Commented SourceSpan)) +ident = + withMeta $ + token $ \case + (TokIdent id) -> Just (Identifier id) + _ -> Nothing + +-- Takes a some parsable thing p and automatically parses the comments before +-- and after and sets the positions and adds it to the annotation. +withMeta :: P (Commented SourceSpan -> b) -> P b +withMeta p = try $ do + comments <- many comment + (Token _ (SourceSpan start _)) <- lookAhead anyToken + fn <- p + (Token _ (SourceSpan end _)) <- lookAhead anyToken + + return $ fn (Commented comments (SourceSpan start end)) + where + comment :: P Comment + comment = + token $ \case + (TokComment c) -> Just (NormalComment c) + (TokDocComment c) -> Just (DocComment c) + _ -> Nothing + +token :: (T -> Maybe a) -> ParsecT S u Identity a +token fn = + Text.Parsec.token + (\(Token t _) -> show t) + (\(Token _ (SourceSpan s1 _)) -> s1) + (\(Token t _) -> fn t) + +tok :: T -> P (Token SourceSpan) +tok t' = + Text.Parsec.token + (\(Token t _) -> show t) + (\(Token _ (SourceSpan s1 _)) -> s1) + (\tok@(Token t _) -> if t == t' then Just tok else Nothing) + +parseFiddleText :: String -> Text -> F (FiddleUnit 'Stage1 F (Commented SourceSpan)) +parseFiddleText sourceName txt = runIdentity $ do + Text.Parsec.runParserT fiddleUnit () sourceName [] diff --git a/src/Language/Fiddle/Tokenizer.hs b/src/Language/Fiddle/Tokenizer.hs index d3239fd..ec41042 100644 --- a/src/Language/Fiddle/Tokenizer.hs +++ b/src/Language/Fiddle/Tokenizer.hs @@ -1,29 +1,34 @@ {-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE OverloadedStrings #-} + module Language.Fiddle.Tokenizer where import Data.Char (isDigit) import Data.Text (Text) +import qualified Data.Text import Language.Fiddle.Types import Text.Parsec import qualified Text.Parsec data T = KWAssertPos - | Ident !String + | TokIdent !Text | KWAt - | KWBittype + | KWBits | KWEnum - | CommentTok !String - | DocCommentTok !String + | TokComment !Text + | TokDocComment !Text | KWLocation | KWObject | KWObjtype | KWOption | KWPackage | KWReg + | KWReserved | KWRo | KWWo - | LitNum !String + | KWRw + | TokLitNum !Text | TokColon | TokComma | TokEq @@ -48,10 +53,11 @@ parseToken = spaces *> tok parseToken' <* spaces Token t . SourceSpan p1 <$> getPosition + parseAlNumTok :: Text -> T parseAlNumTok str = case str of "at" -> KWAt - "bittype" -> KWBittype + "bits" -> KWBits "enum" -> KWEnum "location" -> KWLocation "object" -> KWObject @@ -61,19 +67,22 @@ parseToken = spaces *> tok parseToken' <* spaces "reg" -> KWReg "ro" -> KWRo "wo" -> KWWo - (h : _) | isDigit h -> LitNum str - ident -> Ident ident + "rw" -> KWRw + "reserved" -> KWReserved + "assert_pos" -> KWAssertPos + (Data.Text.head -> h) | isDigit h -> TokLitNum str + ident -> TokIdent ident parseComment = try ( do string "//" - CommentTok <$> manyTill anyChar (char '\n') + TokComment . Data.Text.pack <$> manyTill anyChar (char '\n') ) <|> try ( do string "/**" - DocCommentTok <$> manyTill anyChar (try $ string "*/") + TokDocComment . Data.Text.pack <$> manyTill anyChar (try $ string "*/") ) parseSymbol = @@ -93,7 +102,7 @@ parseToken = spaces *> tok parseToken' <* spaces a $> b = a >> return b parseToken' = - fmap parseAlNumTok (many1 (alphaNum <|> char '_')) + fmap (parseAlNumTok . Data.Text.pack) (many1 (alphaNum <|> char '_')) <|> parseComment <|> parseSymbol diff --git a/src/Language/Fiddle/Types.hs b/src/Language/Fiddle/Types.hs index c83bef2..b712c7a 100644 --- a/src/Language/Fiddle/Types.hs +++ b/src/Language/Fiddle/Types.hs @@ -3,7 +3,7 @@ module Language.Fiddle.Types where import Text.Parsec (SourcePos) import Data.Text (Text) -newtype Comment = Comment Text +data Comment = NormalComment Text | DocComment Text data SourceSpan = SourceSpan { sourceStart :: !SourcePos, @@ -11,4 +11,4 @@ data SourceSpan = SourceSpan } deriving (Eq, Ord, Show) -data Metadata = Metadata !SourceSpan !Comment +data Commented a = Commented ![Comment] !a |