summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Language/Fiddle/Tokenizer.hs128
-rw-r--r--src/Language/Fiddle/Types.hs6
-rw-r--r--src/Main.hs17
3 files changed, 116 insertions, 35 deletions
diff --git a/src/Language/Fiddle/Tokenizer.hs b/src/Language/Fiddle/Tokenizer.hs
index 9931523..d3239fd 100644
--- a/src/Language/Fiddle/Tokenizer.hs
+++ b/src/Language/Fiddle/Tokenizer.hs
@@ -1,37 +1,101 @@
+{-# LANGUAGE DeriveFunctor #-}
module Language.Fiddle.Tokenizer where
+import Data.Char (isDigit)
+import Data.Text (Text)
import Language.Fiddle.Types
+import Text.Parsec
+import qualified Text.Parsec
data T
- = KW_assert_pos
- | KW_at
- | KW_bittype
- | KW_enum
- | KW_location
- | KW_object
- | KW_objtype
- | KW_option
- | KW_package
- | KW_reg
- | KW_ro
- | KW_wo
- | Tok_colon
- | Tok_comma
- | Tok_comment
- | Tok_docComment
- | Tok_eq
- | Tok_ident String
- | Tok_lbrace
- | Tok_lbracket
- | Tok_litnum String
- | Tok_lparen
- | Tok_package
- | Tok_rbrace
- | Tok_rbracket
- | Tok_rparen
- | Tok_semi
-
-data Token = Token T SourceSpan
-
-tokenize :: String -> Text -> [Token]
-tokenize srcName txt = undefined
+ = KWAssertPos
+ | Ident !String
+ | KWAt
+ | KWBittype
+ | KWEnum
+ | CommentTok !String
+ | DocCommentTok !String
+ | KWLocation
+ | KWObject
+ | KWObjtype
+ | KWOption
+ | KWPackage
+ | KWReg
+ | KWRo
+ | KWWo
+ | LitNum !String
+ | TokColon
+ | TokComma
+ | TokEq
+ | TokLBrace
+ | TokLBracket
+ | TokLParen
+ | TokRBrace
+ | TokRBracket
+ | TokRParen
+ | TokSemi
+ deriving (Eq, Ord, Show, Read)
+
+data Token a = Token !T a
+ deriving (Eq, Ord, Show, Functor)
+
+parseToken :: (Monad m) => ParsecT Text u m (Token SourceSpan)
+parseToken = spaces *> tok parseToken' <* spaces
+ where
+ tok tp = do
+ p1 <- getPosition
+ t <- tp
+
+ Token t . SourceSpan p1 <$> getPosition
+
+ parseAlNumTok str =
+ case str of
+ "at" -> KWAt
+ "bittype" -> KWBittype
+ "enum" -> KWEnum
+ "location" -> KWLocation
+ "object" -> KWObject
+ "objtype" -> KWObjtype
+ "option" -> KWOption
+ "package" -> KWPackage
+ "reg" -> KWReg
+ "ro" -> KWRo
+ "wo" -> KWWo
+ (h : _) | isDigit h -> LitNum str
+ ident -> Ident ident
+
+ parseComment =
+ try
+ ( do
+ string "//"
+ CommentTok <$> manyTill anyChar (char '\n')
+ )
+ <|> try
+ ( do
+ string "/**"
+ DocCommentTok <$> manyTill anyChar (try $ string "*/")
+ )
+
+ parseSymbol =
+ choice
+ [ char ':' $> TokColon,
+ char ',' $> TokComma,
+ char '=' $> TokEq,
+ char '{' $> TokLBrace,
+ char '[' $> TokLBracket,
+ char '(' $> TokLParen,
+ char '}' $> TokRBrace,
+ char ']' $> TokRBracket,
+ char ')' $> TokRParen,
+ char ';' $> TokSemi
+ ]
+ where
+ a $> b = a >> return b
+
+ parseToken' =
+ fmap parseAlNumTok (many1 (alphaNum <|> char '_'))
+ <|> parseComment
+ <|> parseSymbol
+
+tokenize :: String -> Text -> Either ParseError [Token SourceSpan]
+tokenize = Text.Parsec.runParser (many parseToken <* eof) ()
diff --git a/src/Language/Fiddle/Types.hs b/src/Language/Fiddle/Types.hs
index 62538d8..c83bef2 100644
--- a/src/Language/Fiddle/Types.hs
+++ b/src/Language/Fiddle/Types.hs
@@ -1,6 +1,7 @@
module Language.Fiddle.Types where
-import Text.Parsec (SourceSpan)
+import Text.Parsec (SourcePos)
+import Data.Text (Text)
newtype Comment = Comment Text
@@ -8,5 +9,6 @@ data SourceSpan = SourceSpan
{ sourceStart :: !SourcePos,
sourceStop :: !SourcePos
}
+ deriving (Eq, Ord, Show)
-data Metadata = Metadata SourceSpan Comment
+data Metadata = Metadata !SourceSpan !Comment
diff --git a/src/Main.hs b/src/Main.hs
index 9249638..226182a 100644
--- a/src/Main.hs
+++ b/src/Main.hs
@@ -1,4 +1,19 @@
module Main where
+import qualified Language.Fiddle.Tokenizer
+import qualified Data.Text.IO
+import qualified System.Environment as System
+import Control.Monad (forM_)
+
main :: IO ()
-main = putStrLn "Hello, World"
+main = do
+ argv <- System.getArgs
+
+ case argv of
+ [filePath] -> do
+ text <- Data.Text.IO.readFile filePath
+ case Language.Fiddle.Tokenizer.tokenize filePath text of
+ Left pe -> putStrLn $ "Parse Error: " ++ show pe
+ Right lst -> forM_ lst $ \(Language.Fiddle.Tokenizer.Token t _) -> print t
+
+ _ -> putStrLn "Wrong Args"