diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Language/Fiddle/Tokenizer.hs | 128 | ||||
-rw-r--r-- | src/Language/Fiddle/Types.hs | 6 | ||||
-rw-r--r-- | src/Main.hs | 17 |
3 files changed, 116 insertions, 35 deletions
diff --git a/src/Language/Fiddle/Tokenizer.hs b/src/Language/Fiddle/Tokenizer.hs index 9931523..d3239fd 100644 --- a/src/Language/Fiddle/Tokenizer.hs +++ b/src/Language/Fiddle/Tokenizer.hs @@ -1,37 +1,101 @@ +{-# LANGUAGE DeriveFunctor #-} module Language.Fiddle.Tokenizer where +import Data.Char (isDigit) +import Data.Text (Text) import Language.Fiddle.Types +import Text.Parsec +import qualified Text.Parsec data T - = KW_assert_pos - | KW_at - | KW_bittype - | KW_enum - | KW_location - | KW_object - | KW_objtype - | KW_option - | KW_package - | KW_reg - | KW_ro - | KW_wo - | Tok_colon - | Tok_comma - | Tok_comment - | Tok_docComment - | Tok_eq - | Tok_ident String - | Tok_lbrace - | Tok_lbracket - | Tok_litnum String - | Tok_lparen - | Tok_package - | Tok_rbrace - | Tok_rbracket - | Tok_rparen - | Tok_semi - -data Token = Token T SourceSpan - -tokenize :: String -> Text -> [Token] -tokenize srcName txt = undefined + = KWAssertPos + | Ident !String + | KWAt + | KWBittype + | KWEnum + | CommentTok !String + | DocCommentTok !String + | KWLocation + | KWObject + | KWObjtype + | KWOption + | KWPackage + | KWReg + | KWRo + | KWWo + | LitNum !String + | TokColon + | TokComma + | TokEq + | TokLBrace + | TokLBracket + | TokLParen + | TokRBrace + | TokRBracket + | TokRParen + | TokSemi + deriving (Eq, Ord, Show, Read) + +data Token a = Token !T a + deriving (Eq, Ord, Show, Functor) + +parseToken :: (Monad m) => ParsecT Text u m (Token SourceSpan) +parseToken = spaces *> tok parseToken' <* spaces + where + tok tp = do + p1 <- getPosition + t <- tp + + Token t . SourceSpan p1 <$> getPosition + + parseAlNumTok str = + case str of + "at" -> KWAt + "bittype" -> KWBittype + "enum" -> KWEnum + "location" -> KWLocation + "object" -> KWObject + "objtype" -> KWObjtype + "option" -> KWOption + "package" -> KWPackage + "reg" -> KWReg + "ro" -> KWRo + "wo" -> KWWo + (h : _) | isDigit h -> LitNum str + ident -> Ident ident + + parseComment = + try + ( do + string "//" + CommentTok <$> manyTill anyChar (char '\n') + ) + <|> try + ( do + string "/**" + DocCommentTok <$> manyTill anyChar (try $ string "*/") + ) + + parseSymbol = + choice + [ char ':' $> TokColon, + char ',' $> TokComma, + char '=' $> TokEq, + char '{' $> TokLBrace, + char '[' $> TokLBracket, + char '(' $> TokLParen, + char '}' $> TokRBrace, + char ']' $> TokRBracket, + char ')' $> TokRParen, + char ';' $> TokSemi + ] + where + a $> b = a >> return b + + parseToken' = + fmap parseAlNumTok (many1 (alphaNum <|> char '_')) + <|> parseComment + <|> parseSymbol + +tokenize :: String -> Text -> Either ParseError [Token SourceSpan] +tokenize = Text.Parsec.runParser (many parseToken <* eof) () diff --git a/src/Language/Fiddle/Types.hs b/src/Language/Fiddle/Types.hs index 62538d8..c83bef2 100644 --- a/src/Language/Fiddle/Types.hs +++ b/src/Language/Fiddle/Types.hs @@ -1,6 +1,7 @@ module Language.Fiddle.Types where -import Text.Parsec (SourceSpan) +import Text.Parsec (SourcePos) +import Data.Text (Text) newtype Comment = Comment Text @@ -8,5 +9,6 @@ data SourceSpan = SourceSpan { sourceStart :: !SourcePos, sourceStop :: !SourcePos } + deriving (Eq, Ord, Show) -data Metadata = Metadata SourceSpan Comment +data Metadata = Metadata !SourceSpan !Comment diff --git a/src/Main.hs b/src/Main.hs index 9249638..226182a 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,4 +1,19 @@ module Main where +import qualified Language.Fiddle.Tokenizer +import qualified Data.Text.IO +import qualified System.Environment as System +import Control.Monad (forM_) + main :: IO () -main = putStrLn "Hello, World" +main = do + argv <- System.getArgs + + case argv of + [filePath] -> do + text <- Data.Text.IO.readFile filePath + case Language.Fiddle.Tokenizer.tokenize filePath text of + Left pe -> putStrLn $ "Parse Error: " ++ show pe + Right lst -> forM_ lst $ \(Language.Fiddle.Tokenizer.Token t _) -> print t + + _ -> putStrLn "Wrong Args" |