summaryrefslogtreecommitdiff
path: root/src/Language/Fiddle/Tokenizer.hs
blob: 4e06b9241334637647f42c9705527417df974f84 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE OverloadedStrings #-}

module Language.Fiddle.Tokenizer where

import Data.Char (isDigit)
import Data.Text (Text)
import qualified Data.Text
import Language.Fiddle.Types
import Text.Parsec
import qualified Text.Parsec

data T
  = KWAssertPos
  | TokIdent !Text
  | KWAt
  | KWBits
  | KWEnum
  | TokComment !Text
  | TokDocComment !Text
  | KWLocation
  | KWObject
  | KWObjtype
  | KWOption
  | KWPackage
  | KWReg
  | KWReserved
  | KWRo
  | KWWo
  | KWRw
  | TokLitNum !Text
  | TokColon
  | TokComma
  | TokEq
  | TokLBrace
  | TokLBracket
  | TokLParen
  | TokRBrace
  | TokRBracket
  | TokRParen
  | TokSemi
  deriving (Eq, Ord, Show, Read)

data Token a = Token !T a
  deriving (Eq, Ord, Show, Functor)

parseToken :: (Monad m) => ParsecT Text u m (Token SourceSpan)
parseToken = spaces *> tok parseToken' <* spaces
  where
    tok tp = do
      p1 <- getPosition
      t <- tp

      Token t . SourceSpan p1 <$> getPosition

    parseAlNumTok :: Text -> T
    parseAlNumTok str =
      case str of
        "at" -> KWAt
        "bits" -> KWBits
        "enum" -> KWEnum
        "location" -> KWLocation
        "object" -> KWObject
        "objtype" -> KWObjtype
        "option" -> KWOption
        "package" -> KWPackage
        "reg" -> KWReg
        "ro" -> KWRo
        "wo" -> KWWo
        "rw" -> KWRw
        "reserved" -> KWReserved
        "assert_pos" -> KWAssertPos
        (Data.Text.head -> h) | isDigit h -> TokLitNum str
        ident -> TokIdent ident

    parseComment =
      try
        ( do
            string "//"
            TokComment . Data.Text.pack <$> manyTill anyChar (char '\n')
        )
        <|> try
          ( do
              string "/**"
              TokDocComment . Data.Text.pack <$> manyTill anyChar (try $ string "*/")
          )

    parseSymbol =
      choice
        [ char ':' $> TokColon,
          char ',' $> TokComma,
          char '=' $> TokEq,
          char '{' $> TokLBrace,
          char '[' $> TokLBracket,
          char '(' $> TokLParen,
          char '}' $> TokRBrace,
          char ']' $> TokRBracket,
          char ')' $> TokRParen,
          char ';' $> TokSemi
        ]
      where
        a $> b = a >> return b

    parseToken' =
      fmap (parseAlNumTok . Data.Text.pack) (many1 (alphaNum <|> char '_'))
        <|> parseComment
        <|> parseSymbol

tokenize :: String -> Text -> Either ParseError [Token SourceSpan]
tokenize = Text.Parsec.runParser (many parseToken <* eof) ()

tokenType :: Token SourceSpan -> T
tokenType (Token t _) = t