{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE OverloadedStrings #-}

module Language.Fiddle.Tokenizer where

import Data.Char (isDigit)
import Data.Text (Text)
import qualified Data.Text
import Language.Fiddle.Types
import Text.Parsec

data T
  = KWAssertPos
  | KWAt
  | KWBits
  | KWEnum
  | KWInstance
  | KWLocation
  | KWOption
  | KWPackage
  | KWReg
  | KWReserved
  | KWRo
  | KWRw
  | KWStruct
  | KWType
  | KWUnion
  | KWWo
  | KWImport
  | KWUsing
  | TokColon
  | TokComma
  | TokDot
  | TokComment !Text
  | TokDocComment !Text
  | TokEq
  | TokIdent !Text
  | TokLBrace
  | TokLBracket
  | TokLParen
  | TokLitNum !Text
  | TokRBrace
  | TokRBracket
  | TokRParen
  | TokSemi
  | TokString !Text
  | TokDirectiveStart -- [[
  | TokDirectiveEnd -- ]]
  deriving (Eq, Ord, Show, Read)

textOf :: T -> Maybe Text
textOf t = do
  case t of
    KWAssertPos -> Just "assert_pos"
    KWAt -> Just "at"
    KWBits -> Just "bits"
    KWEnum -> Just "enum"
    KWInstance -> Just "instance"
    KWLocation -> Just "location"
    KWOption -> Just "option"
    KWPackage -> Just "package"
    KWReg -> Just "reg"
    KWReserved -> Just "reserved"
    KWRo -> Just "ro"
    KWRw -> Just "rw"
    KWStruct -> Just "struct"
    KWType -> Just "type"
    KWUnion -> Just "union"
    KWWo -> Just "wo"
    KWImport -> Just "import"
    KWUsing -> Just "using"
    TokIdent i -> Just i
    TokLitNum n -> Just n
    _ -> Nothing

data Token a = Token !T a
  deriving (Eq, Ord, Show, Functor)

parseToken :: (Monad m) => ParsecT Text u m (Token SourceSpan)
parseToken = spaces *> tok parseToken' <* spaces
  where
    tok tp = do
      p1 <- getPosition
      t <- tp

      Token t . SourceSpan p1 <$> getPosition

    parseAlNumTok :: Text -> T
    parseAlNumTok str =
      case str of
        "at" -> KWAt
        "bits" -> KWBits
        "enum" -> KWEnum
        "location" -> KWLocation
        "instance" -> KWInstance
        "type" -> KWType
        "option" -> KWOption
        "package" -> KWPackage
        "import" -> KWImport
        "reg" -> KWReg
        "ro" -> KWRo
        "wo" -> KWWo
        "rw" -> KWRw
        "reserved" -> KWReserved
        "union" -> KWUnion
        "using" -> KWUsing
        "struct" -> KWStruct
        "assert_pos" -> KWAssertPos
        (Data.Text.head -> h) | isDigit h -> TokLitNum str
        ident -> TokIdent ident

    parseString = fmap (TokString . Data.Text.pack . concat) $ do
      _ <- char '"'
      manyTill
        ( do
            c <- anyChar
            if c == '\\'
              then do
                c2 <- anyChar
                return [c, c2]
              else return [c]
        )
        (char '"')

    parseComment =
      try
        ( do
            _ <- string "//"
            TokComment . Data.Text.pack <$> manyTill anyChar (char '\n')
        )
        <|> try
          ( do
              _ <- string "/**"
              TokDocComment . Data.Text.pack <$> manyTill anyChar (try $ string "*/")
          )

    parseSymbol =
      choice
        [ try (string "[[" $> TokDirectiveStart),
          try (string "]]" $> TokDirectiveEnd),
          char ':' $> TokColon,
          char ',' $> TokComma,
          char '.' $> TokDot,
          char '=' $> TokEq,
          char '{' $> TokLBrace,
          char '[' $> TokLBracket,
          char '(' $> TokLParen,
          char '}' $> TokRBrace,
          char ']' $> TokRBracket,
          char ')' $> TokRParen,
          char ';' $> TokSemi
        ]
      where
        a $> b = a >> return b

    parseToken' =
      fmap (parseAlNumTok . Data.Text.pack) (many1 (alphaNum <|> char '_'))
        <|> parseString
        <|> parseComment
        <|> parseSymbol

tokenize :: String -> Text -> Either ParseError [Token SourceSpan]
tokenize = Text.Parsec.runParser (many parseToken <* eof) ()

tokenType :: Token SourceSpan -> T
tokenType (Token t _) = t