forked from elland/haddock2
157 lines
5.2 KiB
Haskell
157 lines
5.2 KiB
Haskell
module Identifier (
|
|
Identifier (..),
|
|
Namespace (..),
|
|
parseValid,
|
|
) where
|
|
|
|
import Data.Text qualified as Text
|
|
import Text.Parsec qualified as Parsec
|
|
|
|
import Control.Monad (guard)
|
|
import Data.Char (isAlpha, isAlphaNum)
|
|
import Data.Functor (($>))
|
|
import Data.Maybe (listToMaybe, maybeToList)
|
|
import Data.Text (Text)
|
|
import ParserMonad
|
|
import Text.Parsec (State (..))
|
|
import Text.Parsec.Pos (updatePosChar)
|
|
import Text.Read.Lex (isSymbolChar)
|
|
|
|
-- | Identifier string surrounded with namespace, opening, and closing quotes/backticks.
|
|
data Identifier = Identifier !Namespace !Char String !Char
|
|
deriving (Show, Eq)
|
|
|
|
-- | The namespace qualification for an identifier.
|
|
data Namespace = Value | Type | None deriving (Eq, Ord, Enum, Show)
|
|
|
|
parseValid :: Parser Identifier
|
|
parseValid = do
|
|
state@State{stateInput = input, statePos = pos} <- Parsec.getParserState
|
|
|
|
case takeIdentifier input of
|
|
Nothing -> Parsec.parserFail "parseValid: Failed to match a valid identifier"
|
|
Just (namespace, op, ident, cl, input') ->
|
|
let posOp = updatePosChar pos op
|
|
posIdent = Text.foldl updatePosChar posOp ident
|
|
posCl = updatePosChar posIdent cl
|
|
newState = state{stateInput = input', statePos = posCl}
|
|
in Parsec.setParserState newState $> Identifier namespace op (Text.unpack ident) cl
|
|
|
|
{- | Try to parse a delimited identifier off the front of the given input.
|
|
|
|
This tries to match as many valid Haskell identifiers/operators as possible,
|
|
to the point of sometimes accepting invalid things (ex: keywords). Some
|
|
considerations:
|
|
|
|
- operators and identifiers can have module qualifications
|
|
- operators can be wrapped in parens (for prefix)
|
|
- identifiers can be wrapped in backticks (for infix)
|
|
- delimiters are backticks or regular ticks
|
|
- since regular ticks are also valid in identifiers, we opt for the
|
|
longest successful parse
|
|
|
|
This function should make /O(1)/ allocations
|
|
-}
|
|
takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text)
|
|
takeIdentifier input = listToMaybe $ do
|
|
-- Optional namespace
|
|
let (namespace, input') = case Text.uncons input of
|
|
Just ('v', i) -> (Value, i)
|
|
Just ('t', i) -> (Type, i)
|
|
_ -> (None, input)
|
|
-- Opening tick
|
|
(op, input'') <- maybeToList (Text.uncons input')
|
|
guard (op == '\'' || op == '`')
|
|
|
|
-- Identifier/operator
|
|
(ident, input''') <- wrapped input''
|
|
|
|
-- Closing tick
|
|
(cl, input'''') <- maybeToList (Text.uncons input''')
|
|
guard (cl == '\'' || cl == '`')
|
|
|
|
pure (namespace, op, ident, cl, input'''')
|
|
where
|
|
-- \| Parse out a wrapped, possibly qualified, operator or identifier
|
|
wrapped t = do
|
|
(c, t') <- maybeToList (Text.uncons t)
|
|
-- Tuples
|
|
case c of
|
|
'('
|
|
| Just (c', _) <- Text.uncons t'
|
|
, c' == ',' || c' == ')' ->
|
|
do
|
|
let (commas, t'') = Text.span (== ',') t'
|
|
(')', t''') <- maybeToList (Text.uncons t'')
|
|
pure (Text.take (Text.length commas + 2) t, t''')
|
|
|
|
-- Parenthesized
|
|
'(' -> do
|
|
(n, t'') <- general False 0 [] t'
|
|
(')', t''') <- maybeToList (Text.uncons t'')
|
|
pure (Text.take (n + 2) t, t''')
|
|
|
|
-- Backticked
|
|
'`' -> do
|
|
(n, t'') <- general False 0 [] t'
|
|
('`', t''') <- maybeToList (Text.uncons t'')
|
|
pure (Text.take (n + 2) t, t''')
|
|
|
|
-- Unadorned
|
|
_ -> do
|
|
(n, t'') <- general False 0 [] t
|
|
pure (Text.take n t, t'')
|
|
|
|
-- \| Parse out a possibly qualified operator or identifier
|
|
general ::
|
|
Bool ->
|
|
-- \^ refuse inputs starting with operators
|
|
Int ->
|
|
-- \^ total characters \"consumed\" so far
|
|
[(Int, Text)] ->
|
|
-- \^ accumulated results
|
|
Text ->
|
|
-- \^ current input
|
|
[(Int, Text)]
|
|
-- \^ total characters parsed & what remains
|
|
general !identOnly !i acc t
|
|
-- Starts with an identifier (either just an identifier, or a module qual)
|
|
| Just (n, rest) <- identLike t =
|
|
if Text.null rest
|
|
then acc
|
|
else case Text.head rest of
|
|
'`' -> (n + i, rest) : acc
|
|
')' -> (n + i, rest) : acc
|
|
'.' -> general False (n + i + 1) acc (Text.tail rest)
|
|
'\'' ->
|
|
let (m, rest') = quotes rest
|
|
in general True (n + m + 1 + i) ((n + m + i, rest') : acc) (Text.tail rest')
|
|
_ -> acc
|
|
-- An operator
|
|
| Just (n, rest) <- optr t
|
|
, not identOnly =
|
|
(n + i, rest) : acc
|
|
-- Anything else
|
|
| otherwise =
|
|
acc
|
|
|
|
-- \| Parse an identifier off the front of the input
|
|
identLike t
|
|
| Text.null t = Nothing
|
|
| isAlpha (Text.head t) || '_' == Text.head t =
|
|
let !(idt, rest) = Text.span (\c -> isAlphaNum c || c == '_') t
|
|
!(octos, rest') = Text.span (== '#') rest
|
|
in Just (Text.length idt + Text.length octos, rest')
|
|
| otherwise = Nothing
|
|
|
|
-- \| Parse all but the last quote off the front of the input
|
|
-- PRECONDITION: T.head t `elem` ['\'', '`']
|
|
quotes :: Text -> (Int, Text)
|
|
quotes t =
|
|
let !n = Text.length (Text.takeWhile (`elem` ['\'', '`']) t) - 1
|
|
in (n, Text.drop n t)
|
|
|
|
-- \| Parse an operator off the front of the input
|
|
optr t =
|
|
let !(op, rest) = Text.span isSymbolChar t
|
|
in if Text.null op then Nothing else Just (Text.length op, rest)
|