module Identifier ( Identifier (..), Namespace (..), parseValid, ) where import Data.Text qualified as Text import Text.Parsec qualified as Parsec import Control.Monad (guard) import Data.Char (isAlpha, isAlphaNum) import Data.Functor (($>)) import Data.Maybe (listToMaybe, maybeToList) import Data.Text (Text) import ParserMonad import Text.Parsec (State (..)) import Text.Parsec.Pos (updatePosChar) import Text.Read.Lex (isSymbolChar) -- | Identifier string surrounded with namespace, opening, and closing quotes/backticks. data Identifier = Identifier !Namespace !Char String !Char deriving (Show, Eq) -- | The namespace qualification for an identifier. data Namespace = Value | Type | None deriving (Eq, Ord, Enum, Show) parseValid :: Parser Identifier parseValid = do state@State{stateInput = input, statePos = pos} <- Parsec.getParserState case takeIdentifier input of Nothing -> Parsec.parserFail "parseValid: Failed to match a valid identifier" Just (namespace, op, ident, cl, input') -> let posOp = updatePosChar pos op posIdent = Text.foldl updatePosChar posOp ident posCl = updatePosChar posIdent cl newState = state{stateInput = input', statePos = posCl} in Parsec.setParserState newState $> Identifier namespace op (Text.unpack ident) cl {- | Try to parse a delimited identifier off the front of the given input. This tries to match as many valid Haskell identifiers/operators as possible, to the point of sometimes accepting invalid things (ex: keywords). Some considerations: - operators and identifiers can have module qualifications - operators can be wrapped in parens (for prefix) - identifiers can be wrapped in backticks (for infix) - delimiters are backticks or regular ticks - since regular ticks are also valid in identifiers, we opt for the longest successful parse This function should make /O(1)/ allocations -} takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text) takeIdentifier input = listToMaybe $ do -- Optional namespace let (namespace, input') = case Text.uncons input of Just ('v', i) -> (Value, i) Just ('t', i) -> (Type, i) _ -> (None, input) -- Opening tick (op, input'') <- maybeToList (Text.uncons input') guard (op == '\'' || op == '`') -- Identifier/operator (ident, input''') <- wrapped input'' -- Closing tick (cl, input'''') <- maybeToList (Text.uncons input''') guard (cl == '\'' || cl == '`') pure (namespace, op, ident, cl, input'''') where -- \| Parse out a wrapped, possibly qualified, operator or identifier wrapped t = do (c, t') <- maybeToList (Text.uncons t) -- Tuples case c of '(' | Just (c', _) <- Text.uncons t' , c' == ',' || c' == ')' -> do let (commas, t'') = Text.span (== ',') t' (')', t''') <- maybeToList (Text.uncons t'') pure (Text.take (Text.length commas + 2) t, t''') -- Parenthesized '(' -> do (n, t'') <- general False 0 [] t' (')', t''') <- maybeToList (Text.uncons t'') pure (Text.take (n + 2) t, t''') -- Backticked '`' -> do (n, t'') <- general False 0 [] t' ('`', t''') <- maybeToList (Text.uncons t'') pure (Text.take (n + 2) t, t''') -- Unadorned _ -> do (n, t'') <- general False 0 [] t pure (Text.take n t, t'') -- \| Parse out a possibly qualified operator or identifier general :: Bool -> -- \^ refuse inputs starting with operators Int -> -- \^ total characters \"consumed\" so far [(Int, Text)] -> -- \^ accumulated results Text -> -- \^ current input [(Int, Text)] -- \^ total characters parsed & what remains general !identOnly !i acc t -- Starts with an identifier (either just an identifier, or a module qual) | Just (n, rest) <- identLike t = if Text.null rest then acc else case Text.head rest of '`' -> (n + i, rest) : acc ')' -> (n + i, rest) : acc '.' -> general False (n + i + 1) acc (Text.tail rest) '\'' -> let (m, rest') = quotes rest in general True (n + m + 1 + i) ((n + m + i, rest') : acc) (Text.tail rest') _ -> acc -- An operator | Just (n, rest) <- optr t , not identOnly = (n + i, rest) : acc -- Anything else | otherwise = acc -- \| Parse an identifier off the front of the input identLike t | Text.null t = Nothing | isAlpha (Text.head t) || '_' == Text.head t = let !(idt, rest) = Text.span (\c -> isAlphaNum c || c == '_') t !(octos, rest') = Text.span (== '#') rest in Just (Text.length idt + Text.length octos, rest') | otherwise = Nothing -- \| Parse all but the last quote off the front of the input -- PRECONDITION: T.head t `elem` ['\'', '`'] quotes :: Text -> (Int, Text) quotes t = let !n = Text.length (Text.takeWhile (`elem` ['\'', '`']) t) - 1 in (n, Text.drop n t) -- \| Parse an operator off the front of the input optr t = let !(op, rest) = Text.span isSymbolChar t in if Text.null op then Nothing else Just (Text.length op, rest)