1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE OverloadedStrings #-}
module Language.Fiddle.Tokenizer where
import Data.Char (isDigit)
import Data.Text (Text)
import qualified Data.Text
import Language.Fiddle.Types
import Text.Parsec
import qualified Text.Parsec
data T
= KWAssertPos
| TokIdent !Text
| KWAt
| KWBits
| KWEnum
| TokComment !Text
| TokDocComment !Text
| KWLocation
| KWObject
| KWObjtype
| KWOption
| KWPackage
| KWReg
| KWReserved
| KWRo
| KWWo
| KWRw
| TokLitNum !Text
| TokColon
| TokComma
| TokEq
| TokLBrace
| TokLBracket
| TokLParen
| TokRBrace
| TokRBracket
| TokRParen
| TokSemi
deriving (Eq, Ord, Show, Read)
data Token a = Token !T a
deriving (Eq, Ord, Show, Functor)
parseToken :: (Monad m) => ParsecT Text u m (Token SourceSpan)
parseToken = spaces *> tok parseToken' <* spaces
where
tok tp = do
p1 <- getPosition
t <- tp
Token t . SourceSpan p1 <$> getPosition
parseAlNumTok :: Text -> T
parseAlNumTok str =
case str of
"at" -> KWAt
"bits" -> KWBits
"enum" -> KWEnum
"location" -> KWLocation
"object" -> KWObject
"objtype" -> KWObjtype
"option" -> KWOption
"package" -> KWPackage
"reg" -> KWReg
"ro" -> KWRo
"wo" -> KWWo
"rw" -> KWRw
"reserved" -> KWReserved
"assert_pos" -> KWAssertPos
(Data.Text.head -> h) | isDigit h -> TokLitNum str
ident -> TokIdent ident
parseComment =
try
( do
string "//"
TokComment . Data.Text.pack <$> manyTill anyChar (char '\n')
)
<|> try
( do
string "/**"
TokDocComment . Data.Text.pack <$> manyTill anyChar (try $ string "*/")
)
parseSymbol =
choice
[ char ':' $> TokColon,
char ',' $> TokComma,
char '=' $> TokEq,
char '{' $> TokLBrace,
char '[' $> TokLBracket,
char '(' $> TokLParen,
char '}' $> TokRBrace,
char ']' $> TokRBracket,
char ')' $> TokRParen,
char ';' $> TokSemi
]
where
a $> b = a >> return b
parseToken' =
fmap (parseAlNumTok . Data.Text.pack) (many1 (alphaNum <|> char '_'))
<|> parseComment
<|> parseSymbol
tokenize :: String -> Text -> Either ParseError [Token SourceSpan]
tokenize = Text.Parsec.runParser (many parseToken <* eof) ()
tokenType :: Token SourceSpan -> T
tokenType (Token t _) = t
|