This commit is contained in:
Igor Ranieri 2025-09-21 08:19:01 +02:00
commit c9f61c4e06
16 changed files with 1366 additions and 0 deletions

157
src/Identifier.hs Normal file
View file

@ -0,0 +1,157 @@
module Identifier (
Identifier (..),
Namespace (..),
parseValid,
) where
import Data.Text qualified as Text
import Text.Parsec qualified as Parsec
import Control.Monad (guard)
import Data.Char (isAlpha, isAlphaNum)
import Data.Functor (($>))
import Data.Maybe (listToMaybe, maybeToList)
import Data.Text (Text)
import ParserMonad
import Text.Parsec (State (..))
import Text.Parsec.Pos (updatePosChar)
import Text.Read.Lex (isSymbolChar)
-- | Identifier string surrounded with namespace, opening, and closing quotes/backticks.
data Identifier = Identifier !Namespace !Char String !Char
deriving (Show, Eq)
-- | The namespace qualification for an identifier.
data Namespace = Value | Type | None deriving (Eq, Ord, Enum, Show)
parseValid :: Parser Identifier
parseValid = do
state@State{stateInput = input, statePos = pos} <- Parsec.getParserState
case takeIdentifier input of
Nothing -> Parsec.parserFail "parseValid: Failed to match a valid identifier"
Just (namespace, op, ident, cl, input') ->
let posOp = updatePosChar pos op
posIdent = Text.foldl updatePosChar posOp ident
posCl = updatePosChar posIdent cl
newState = state{stateInput = input', statePos = posCl}
in Parsec.setParserState newState $> Identifier namespace op (Text.unpack ident) cl
{- | Try to parse a delimited identifier off the front of the given input.
This tries to match as many valid Haskell identifiers/operators as possible,
to the point of sometimes accepting invalid things (ex: keywords). Some
considerations:
- operators and identifiers can have module qualifications
- operators can be wrapped in parens (for prefix)
- identifiers can be wrapped in backticks (for infix)
- delimiters are backticks or regular ticks
- since regular ticks are also valid in identifiers, we opt for the
longest successful parse
This function should make /O(1)/ allocations
-}
takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text)
takeIdentifier input = listToMaybe $ do
-- Optional namespace
let (namespace, input') = case Text.uncons input of
Just ('v', i) -> (Value, i)
Just ('t', i) -> (Type, i)
_ -> (None, input)
-- Opening tick
(op, input'') <- maybeToList (Text.uncons input')
guard (op == '\'' || op == '`')
-- Identifier/operator
(ident, input''') <- wrapped input''
-- Closing tick
(cl, input'''') <- maybeToList (Text.uncons input''')
guard (cl == '\'' || cl == '`')
pure (namespace, op, ident, cl, input'''')
where
-- \| Parse out a wrapped, possibly qualified, operator or identifier
wrapped t = do
(c, t') <- maybeToList (Text.uncons t)
-- Tuples
case c of
'('
| Just (c', _) <- Text.uncons t'
, c' == ',' || c' == ')' ->
do
let (commas, t'') = Text.span (== ',') t'
(')', t''') <- maybeToList (Text.uncons t'')
pure (Text.take (Text.length commas + 2) t, t''')
-- Parenthesized
'(' -> do
(n, t'') <- general False 0 [] t'
(')', t''') <- maybeToList (Text.uncons t'')
pure (Text.take (n + 2) t, t''')
-- Backticked
'`' -> do
(n, t'') <- general False 0 [] t'
('`', t''') <- maybeToList (Text.uncons t'')
pure (Text.take (n + 2) t, t''')
-- Unadorned
_ -> do
(n, t'') <- general False 0 [] t
pure (Text.take n t, t'')
-- \| Parse out a possibly qualified operator or identifier
general ::
Bool ->
-- \^ refuse inputs starting with operators
Int ->
-- \^ total characters \"consumed\" so far
[(Int, Text)] ->
-- \^ accumulated results
Text ->
-- \^ current input
[(Int, Text)]
-- \^ total characters parsed & what remains
general !identOnly !i acc t
-- Starts with an identifier (either just an identifier, or a module qual)
| Just (n, rest) <- identLike t =
if Text.null rest
then acc
else case Text.head rest of
'`' -> (n + i, rest) : acc
')' -> (n + i, rest) : acc
'.' -> general False (n + i + 1) acc (Text.tail rest)
'\'' ->
let (m, rest') = quotes rest
in general True (n + m + 1 + i) ((n + m + i, rest') : acc) (Text.tail rest')
_ -> acc
-- An operator
| Just (n, rest) <- optr t
, not identOnly =
(n + i, rest) : acc
-- Anything else
| otherwise =
acc
-- \| Parse an identifier off the front of the input
identLike t
| Text.null t = Nothing
| isAlpha (Text.head t) || '_' == Text.head t =
let !(idt, rest) = Text.span (\c -> isAlphaNum c || c == '_') t
!(octos, rest') = Text.span (== '#') rest
in Just (Text.length idt + Text.length octos, rest')
| otherwise = Nothing
-- \| Parse all but the last quote off the front of the input
-- PRECONDITION: T.head t `elem` ['\'', '`']
quotes :: Text -> (Int, Text)
quotes t =
let !n = Text.length (Text.takeWhile (`elem` ['\'', '`']) t) - 1
in (n, Text.drop n t)
-- \| Parse an operator off the front of the input
optr t =
let !(op, rest) = Text.span isSymbolChar t
in if Text.null op then Nothing else Just (Text.length op, rest)