summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosh Rahm <joshuarahm@gmail.com>2022-12-17 22:38:48 -0700
committerJosh Rahm <joshuarahm@gmail.com>2022-12-17 22:39:05 -0700
commit01685ab88228fb602cb0e408d93560e76e1371a1 (patch)
tree861a5ce9fc874ff7440f6d855758fdb1d86d4ffe
parent47c776413ed4e11839ad6838575d0077ddd496a3 (diff)
downloadfiddle-01685ab88228fb602cb0e408d93560e76e1371a1.tar.gz
fiddle-01685ab88228fb602cb0e408d93560e76e1371a1.tar.bz2
fiddle-01685ab88228fb602cb0e408d93560e76e1371a1.zip
WIP: Basic parser implemented.
The parser is completely untested and probably broken, but it's probably pretty close becasue it does typecheck. This is a Work-in-progress.
-rw-r--r--src/Language/Fiddle/Ast.hs151
-rw-r--r--src/Language/Fiddle/Compiler.hs8
-rw-r--r--src/Language/Fiddle/Parser.hs264
-rw-r--r--src/Language/Fiddle/Tokenizer.hs31
-rw-r--r--src/Language/Fiddle/Types.hs4
5 files changed, 432 insertions, 26 deletions
diff --git a/src/Language/Fiddle/Ast.hs b/src/Language/Fiddle/Ast.hs
new file mode 100644
index 0000000..23fb05f
--- /dev/null
+++ b/src/Language/Fiddle/Ast.hs
@@ -0,0 +1,151 @@
+module Language.Fiddle.Ast where
+
+import Data.Text (Text)
+
+-- Stage of compilation. Parts of the AST maybe un unavailable with other stages
+-- as compilation simplifies the AST.
+data Stage = Stage1 | Stage2 | Stage3
+
+-- Just an identifier.
+data Identifier a = Identifier !Text a
+
+-- Expression.
+data Expression stage a where
+ -- Just a string. Parsing the number comes in stage2.
+ LitNum :: Text -> a -> Expression 'Stage1 a
+ RealNum :: Integer -> a -> Expression 'Stage2 a
+ Var :: Identifier a -> a -> Expression stage a
+
+-- Root of the parse tree. Just contains a list of declarations.
+data FiddleUnit (stage :: Stage) (f :: * -> *) a where
+ FiddleUnit :: [FiddleDecl stage f a] -> a -> FiddleUnit stage f a
+
+-- Top-level declarations.
+data FiddleDecl (stage :: Stage) (f :: * -> *) a where
+ {-
+ - An option is a key/value pair.
+ - option <ident> <ident>;
+ -}
+ OptionDecl :: Identifier a -> Identifier a -> a -> FiddleDecl stage f a
+ {- Package Statement. Package Name, Package body -}
+ PackageDecl ::
+ Identifier a ->
+ f (PackageBody stage f a) ->
+ a ->
+ FiddleDecl stage f a
+ {- location <identifier> = <expr>. -}
+ LocationDecl ::
+ Identifier a ->
+ Expression stage a ->
+ a ->
+ FiddleDecl stage f a
+ {- bits <identifier> : <type> -}
+ BitsDecl ::
+ Identifier a ->
+ BitType stage f a ->
+ a ->
+ FiddleDecl stage f a
+ {- objtype <identifier> : <type> -}
+ ObjTypeDecl ::
+ Identifier a ->
+ f (ObjTypeBody stage f a) ->
+ a ->
+ FiddleDecl stage f a
+ {- object <ident> at <expr> : <type> -}
+ ObjectDecl ::
+ Identifier a ->
+ Expression stage a ->
+ ObjType stage f a ->
+ a ->
+ FiddleDecl stage f a
+
+data ObjType stage f a where
+ -- { <body> }
+ -- Anonymous types are only allowed in stage1. Stage2 should have them be
+ -- de-anonymized.
+ AnonymousObjType :: f (ObjTypeBody 'Stage1 f a) -> a -> ObjType 'Stage1 f a
+ -- <type>[<expr>]
+ ArrayObjType :: ObjType stage f a -> Expression stage a -> a -> ObjType stage f a
+ -- <identifier>
+ ReferencedObjType :: Identifier a -> a -> ObjType stage f a
+
+data ObjTypeBody (stage :: Stage) (f :: * -> *) a where
+ ObjTypeBody :: [ObjTypeDecl stage f a] -> a -> ObjTypeBody stage f a
+
+data ObjTypeDecl stage f a where
+ {- assert_pos(<expr>) -}
+ AssertPosStatement :: Expression stage a -> a -> ObjTypeDecl stage f a
+ {- reg <ident>(<expr>) : <regtype> -}
+ RegisterDecl ::
+ Maybe (Modifier a) ->
+ Maybe (Identifier a) ->
+ Expression stage a ->
+ Maybe (RegisterBody stage f a) ->
+ a ->
+ ObjTypeDecl stage f a
+
+data Modifier a where
+ ModifierKeyword :: ModifierKeyword -> a -> Modifier a
+
+data ModifierKeyword = Rw | Ro | Wo
+
+data DeferredRegisterBody stage f a where
+ DeferredRegisterBody ::
+ [RegisterBitsDecl stage f a] ->
+ a ->
+ DeferredRegisterBody stage f a
+
+data RegisterBody stage f a where
+ RegisterBody :: f (DeferredRegisterBody stage f a) -> a -> RegisterBody stage f a
+
+data RegisterBitsDecl stage f a where
+ -- reserved(<expr>)
+ ReservedBits :: Expression stage a -> a -> RegisterBitsDecl stage f a
+ -- <modifer> <ident> : <type>
+ DefinedBits ::
+ Maybe (Modifier a) ->
+ Identifier a ->
+ RegisterBitsTypeRef stage f a ->
+ a ->
+ RegisterBitsDecl stage f a
+
+data RegisterBitsTypeRef stage f a where
+ -- <type>[<expr>]
+ RegisterBitsArray ::
+ RegisterBitsTypeRef stage f a ->
+ Expression stage a ->
+ a ->
+ RegisterBitsTypeRef stage f a
+ {- Reference to a type. -}
+ RegisterBitsReference :: Identifier a -> a -> RegisterBitsTypeRef stage f a
+ {- enum(<expr>) { <body> }
+ Anonymous types are only allowed in stage1.
+ Stage2 should de-anonymize these type. -}
+ RegisterBitsAnonymousType ::
+ AnonymousBitsType f a ->
+ a ->
+ RegisterBitsTypeRef 'Stage1 f a
+
+data AnonymousBitsType f a where
+ -- enum(<expr>) { <body> }
+ AnonymousEnumBody :: Expression 'Stage1 a -> f (EnumBody stage f a) -> a -> AnonymousBitsType f a
+
+data BitType (stage :: Stage) (f :: * -> *) a where
+ -- enum(<expr>) { <body> }
+ EnumBitType :: Expression stage a -> f (EnumBody stage f a) -> a -> BitType stage f a
+ -- (<expr>)
+ RawBits :: Expression stage a -> a -> BitType stage f a
+
+data EnumBody (stage :: Stage) (f :: * -> *) a where
+ -- <decl>,
+ EnumBody :: [EnumConstantDecl stage a] -> a -> EnumBody stage f a
+
+data EnumConstantDecl stage a where
+ -- <ident> = <expr>
+ EnumConstantDecl :: Identifier a -> Expression stage a -> a -> EnumConstantDecl stage a
+ -- reserved = <expr>
+ EnumConstantReserved :: Expression stage a -> a -> EnumConstantDecl stage a
+
+data PackageBody (stage :: Stage) (f :: * -> *) a where
+ {- The body of a package -}
+ PackageBody :: [FiddleDecl stage f a] -> a -> PackageBody stage f a
diff --git a/src/Language/Fiddle/Compiler.hs b/src/Language/Fiddle/Compiler.hs
new file mode 100644
index 0000000..9bc9eb9
--- /dev/null
+++ b/src/Language/Fiddle/Compiler.hs
@@ -0,0 +1,8 @@
+module Language.Fiddle.Compiler where
+
+import Language.Fiddle.Ast
+
+-- Converts a Stage1 AST to a Stage2 AST.
+compileStage2 ::
+ FiddleUnit 'Stage1 Identity Metadata -> FiddleUnit 'Stage2 Identity Metadata
+compileStage2 = undefined
diff --git a/src/Language/Fiddle/Parser.hs b/src/Language/Fiddle/Parser.hs
index 3a44b31..16489c1 100644
--- a/src/Language/Fiddle/Parser.hs
+++ b/src/Language/Fiddle/Parser.hs
@@ -1,21 +1,259 @@
+{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}
-module Language.Fiddle.Parser where
+module Language.Fiddle.Parser
+ ( fiddleUnit,
+ parseFiddleText,
+ )
+where
-import Language.Fiddle.Types
import Data.Functor.Identity
-import Data.Text
-import Text.Parsec
+import Data.Text (Text)
+import qualified Data.Text
+import Language.Fiddle.Ast
+import Language.Fiddle.Tokenizer
+import Language.Fiddle.Types
+import Text.Parsec hiding (token)
+import qualified Text.Parsec
+
+type F = Either ParseError
+
+type S = [Token SourceSpan]
+
+type P = ParsecT S () Identity
+
+type Pa (a :: Stage -> (* -> *) -> * -> *) = P (a 'Stage1 F (Commented SourceSpan))
+
+fiddleUnit :: Pa FiddleUnit
+fiddleUnit =
+ withMeta $
+ FiddleUnit <$> many (fiddleDecl <* tok TokSemi)
+
+fiddleDecl :: Pa FiddleDecl
+fiddleDecl =
+ withMeta $
+ choice
+ [ tok KWOption >> OptionDecl <$> ident <*> ident,
+ tok KWPackage >> PackageDecl
+ <$> ident
+ <*> defer body packageBody,
+ tok KWLocation >> LocationDecl <$> ident <*> (tok TokEq >> expression),
+ tok KWBits >> BitsDecl <$> ident <*> (tok TokColon >> bitType),
+ tok KWObjtype
+ >> ObjTypeDecl <$> ident <*> (tok TokColon >> defer body objTypeBody),
+ tok KWObject
+ >> ObjectDecl
+ <$> ident
+ <*> (tok KWAt *> expression)
+ <*> (tok TokColon *> objType)
+ ]
+
+objType :: Pa ObjType
+objType = recur objType'
+ where
+ recur b = do
+ recur
+ ( try $
+ withMeta $ do
+ t <- b
+ e <- tok TokLBrace *> expression <* tok TokRBrace
+ return (ArrayObjType t e)
+ )
+ <|> b
+
+ objType' =
+ withMeta $
+ (ReferencedObjType <$> ident)
+ <|> (AnonymousObjType <$> defer body objTypeBody)
+
+exprInParen :: P (Expression Stage1 (Commented SourceSpan))
+exprInParen = tok TokLParen *> expression <* tok TokRParen
+
+objTypeBody :: Pa ObjTypeBody
+objTypeBody =
+ withMeta $
+ withinBody $
+ ObjTypeBody <$> many (objTypeDecl <* tok TokSemi)
+
+objTypeDecl :: Pa ObjTypeDecl
+objTypeDecl =
+ withMeta $
+ ( do
+ tok KWAssertPos
+ AssertPosStatement <$> exprInParen
+ )
+ <|> ( do
+ tok KWReg
+ RegisterDecl
+ <$> optionMaybe modifier
+ <*> optionMaybe ident
+ <*> exprInParen
+ <*> optionMaybe (tok TokColon *> registerBody)
+ )
+
+modifier :: P (Modifier (Commented SourceSpan))
+modifier =
+ withMeta $
+ ModifierKeyword
+ <$> choice
+ [ tok KWRo >> return Ro,
+ tok KWRw >> return Rw,
+ tok KWWo >> return Wo
+ ]
+
+registerBody :: Pa RegisterBody
+registerBody = withMeta $ RegisterBody <$> defer body deferredRegisterBody
+
+deferredRegisterBody :: Pa DeferredRegisterBody
+deferredRegisterBody =
+ withMeta $
+ withinBody $
+ DeferredRegisterBody <$> many (registerBitsDecl <* tok TokSemi)
-newtype FiddleUnit a = FiddleUnit a
+registerBitsDecl :: Pa RegisterBitsDecl
+registerBitsDecl =
+ withMeta $
+ ( do
+ tok KWReserved >> ReservedBits <$> exprInParen
+ )
+ <|> ( DefinedBits <$> optionMaybe modifier
+ <*> ident
+ <*> registerBitsTypeRef
+ )
-parseUnit :: ParsecT s u m (FiddleUnit Metadata)
-parseUnit = undefined
+registerBitsTypeRef :: Pa RegisterBitsTypeRef
+registerBitsTypeRef = recur typeRef
+ where
+ recur b = do
+ recur
+ ( try $
+ withMeta $ do
+ t <- b
+ e <- tok TokLBrace *> expression <* tok TokRBrace
+ return (RegisterBitsArray t e)
+ )
+ <|> b
+
+ typeRef =
+ withMeta $
+ (tok KWEnum >> RegisterBitsAnonymousType <$> anonymousBitsType)
+ <|> (RegisterBitsReference <$> ident)
+
+anonymousBitsType :: P (AnonymousBitsType F (Commented SourceSpan))
+anonymousBitsType = withMeta $ do
+ tok KWEnum
+ AnonymousEnumBody <$> exprInParen <*> defer body enumBody
+
+bitType :: Pa BitType
+bitType = withMeta $ rawBits <|> enumType
+ where
+ rawBits = RawBits <$> (tok TokLParen *> expression <* tok TokRParen)
+ enumType = do
+ tok KWEnum
+ expr <- exprInParen
+ EnumBitType expr <$> defer body enumBody
+
+enumBody :: Pa EnumBody
+enumBody =
+ withMeta $
+ withinBody $
+ EnumBody <$> many enumConstantDecl
+
+enumConstantDecl :: P (EnumConstantDecl Stage1 (Commented SourceSpan))
+enumConstantDecl =
+ withMeta $
+ (tok KWReserved >> EnumConstantReserved <$> exprInParen)
+ <|> (EnumConstantDecl <$> ident <*> (tok TokEq >> expression))
+
+expression :: P (Expression 'Stage1 (Commented SourceSpan))
+expression = withMeta $
+ token $ \case
+ (TokLitNum num) -> Just (LitNum num)
+ (TokIdent i) -> Just $
+ \(Commented cs s) -> Var (Identifier i (Commented [] s)) (Commented cs s)
+ _ -> Nothing
+
+body :: P [Token SourceSpan]
+body = do
+ t0 <- tok TokLBrace
+ ret <-
+ concat
+ <$> manyTill
+ ( body <|> fmap (: []) anyToken
+ )
+ (lookAhead $ tok TokRBrace)
+ return $ t0 : ret
+
+-- Parses something within braces.
+withinBody :: P a -> P a
+withinBody p = tok TokLBrace *> p <* tok TokRBrace
+
+-- A deferred parsing takes a part of a text file (such as a body) and returns a
+-- deferred computation for parsing that section.
+--
+-- This is useful because it allows for parse errors to be detected in multiple
+-- locations. This is because for things like bodies (stuff inside { ... }), we
+-- can parse the stuff inside the body as it's own, separate parsing.
+defer :: P [Token SourceSpan] -> P b -> P (F b)
+defer p0 pb = do
+ contents <- p0
+ sourcePos <- getPosition
-parseText :: String -> Text -> Either String (FiddleUnit Metadata)
-parseText sourceName txt = runIdentity $ do
- res <- Text.Parsec.runParserT parseUnit () sourceName []
return $
- case res of
- Left pe -> Left (show pe)
- Right a -> Right a
+ Text.Parsec.runParser
+ ( do
+ setPosition sourcePos
+ pb <* eof
+ )
+ ()
+ (sourceName sourcePos)
+ contents
+
+packageBody :: Pa PackageBody
+packageBody =
+ withMeta $
+ withinBody $
+ PackageBody <$> many (fiddleDecl <* tok TokSemi)
+
+ident :: P (Identifier (Commented SourceSpan))
+ident =
+ withMeta $
+ token $ \case
+ (TokIdent id) -> Just (Identifier id)
+ _ -> Nothing
+
+-- Takes a some parsable thing p and automatically parses the comments before
+-- and after and sets the positions and adds it to the annotation.
+withMeta :: P (Commented SourceSpan -> b) -> P b
+withMeta p = try $ do
+ comments <- many comment
+ (Token _ (SourceSpan start _)) <- lookAhead anyToken
+ fn <- p
+ (Token _ (SourceSpan end _)) <- lookAhead anyToken
+
+ return $ fn (Commented comments (SourceSpan start end))
+ where
+ comment :: P Comment
+ comment =
+ token $ \case
+ (TokComment c) -> Just (NormalComment c)
+ (TokDocComment c) -> Just (DocComment c)
+ _ -> Nothing
+
+token :: (T -> Maybe a) -> ParsecT S u Identity a
+token fn =
+ Text.Parsec.token
+ (\(Token t _) -> show t)
+ (\(Token _ (SourceSpan s1 _)) -> s1)
+ (\(Token t _) -> fn t)
+
+tok :: T -> P (Token SourceSpan)
+tok t' =
+ Text.Parsec.token
+ (\(Token t _) -> show t)
+ (\(Token _ (SourceSpan s1 _)) -> s1)
+ (\tok@(Token t _) -> if t == t' then Just tok else Nothing)
+
+parseFiddleText :: String -> Text -> F (FiddleUnit 'Stage1 F (Commented SourceSpan))
+parseFiddleText sourceName txt = runIdentity $ do
+ Text.Parsec.runParserT fiddleUnit () sourceName []
diff --git a/src/Language/Fiddle/Tokenizer.hs b/src/Language/Fiddle/Tokenizer.hs
index d3239fd..ec41042 100644
--- a/src/Language/Fiddle/Tokenizer.hs
+++ b/src/Language/Fiddle/Tokenizer.hs
@@ -1,29 +1,34 @@
{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE OverloadedStrings #-}
+
module Language.Fiddle.Tokenizer where
import Data.Char (isDigit)
import Data.Text (Text)
+import qualified Data.Text
import Language.Fiddle.Types
import Text.Parsec
import qualified Text.Parsec
data T
= KWAssertPos
- | Ident !String
+ | TokIdent !Text
| KWAt
- | KWBittype
+ | KWBits
| KWEnum
- | CommentTok !String
- | DocCommentTok !String
+ | TokComment !Text
+ | TokDocComment !Text
| KWLocation
| KWObject
| KWObjtype
| KWOption
| KWPackage
| KWReg
+ | KWReserved
| KWRo
| KWWo
- | LitNum !String
+ | KWRw
+ | TokLitNum !Text
| TokColon
| TokComma
| TokEq
@@ -48,10 +53,11 @@ parseToken = spaces *> tok parseToken' <* spaces
Token t . SourceSpan p1 <$> getPosition
+ parseAlNumTok :: Text -> T
parseAlNumTok str =
case str of
"at" -> KWAt
- "bittype" -> KWBittype
+ "bits" -> KWBits
"enum" -> KWEnum
"location" -> KWLocation
"object" -> KWObject
@@ -61,19 +67,22 @@ parseToken = spaces *> tok parseToken' <* spaces
"reg" -> KWReg
"ro" -> KWRo
"wo" -> KWWo
- (h : _) | isDigit h -> LitNum str
- ident -> Ident ident
+ "rw" -> KWRw
+ "reserved" -> KWReserved
+ "assert_pos" -> KWAssertPos
+ (Data.Text.head -> h) | isDigit h -> TokLitNum str
+ ident -> TokIdent ident
parseComment =
try
( do
string "//"
- CommentTok <$> manyTill anyChar (char '\n')
+ TokComment . Data.Text.pack <$> manyTill anyChar (char '\n')
)
<|> try
( do
string "/**"
- DocCommentTok <$> manyTill anyChar (try $ string "*/")
+ TokDocComment . Data.Text.pack <$> manyTill anyChar (try $ string "*/")
)
parseSymbol =
@@ -93,7 +102,7 @@ parseToken = spaces *> tok parseToken' <* spaces
a $> b = a >> return b
parseToken' =
- fmap parseAlNumTok (many1 (alphaNum <|> char '_'))
+ fmap (parseAlNumTok . Data.Text.pack) (many1 (alphaNum <|> char '_'))
<|> parseComment
<|> parseSymbol
diff --git a/src/Language/Fiddle/Types.hs b/src/Language/Fiddle/Types.hs
index c83bef2..b712c7a 100644
--- a/src/Language/Fiddle/Types.hs
+++ b/src/Language/Fiddle/Types.hs
@@ -3,7 +3,7 @@ module Language.Fiddle.Types where
import Text.Parsec (SourcePos)
import Data.Text (Text)
-newtype Comment = Comment Text
+data Comment = NormalComment Text | DocComment Text
data SourceSpan = SourceSpan
{ sourceStart :: !SourcePos,
@@ -11,4 +11,4 @@ data SourceSpan = SourceSpan
}
deriving (Eq, Ord, Show)
-data Metadata = Metadata !SourceSpan !Comment
+data Commented a = Commented ![Comment] !a