forked from elland/haddock2
Init
This commit is contained in:
commit
c9f61c4e06
16 changed files with 1366 additions and 0 deletions
157
src/Identifier.hs
Normal file
157
src/Identifier.hs
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
module Identifier (
|
||||
Identifier (..),
|
||||
Namespace (..),
|
||||
parseValid,
|
||||
) where
|
||||
|
||||
import Data.Text qualified as Text
|
||||
import Text.Parsec qualified as Parsec
|
||||
|
||||
import Control.Monad (guard)
|
||||
import Data.Char (isAlpha, isAlphaNum)
|
||||
import Data.Functor (($>))
|
||||
import Data.Maybe (listToMaybe, maybeToList)
|
||||
import Data.Text (Text)
|
||||
import ParserMonad
|
||||
import Text.Parsec (State (..))
|
||||
import Text.Parsec.Pos (updatePosChar)
|
||||
import Text.Read.Lex (isSymbolChar)
|
||||
|
||||
-- | Identifier string surrounded with namespace, opening, and closing quotes/backticks.
|
||||
data Identifier = Identifier !Namespace !Char String !Char
|
||||
deriving (Show, Eq)
|
||||
|
||||
-- | The namespace qualification for an identifier.
|
||||
data Namespace = Value | Type | None deriving (Eq, Ord, Enum, Show)
|
||||
|
||||
parseValid :: Parser Identifier
|
||||
parseValid = do
|
||||
state@State{stateInput = input, statePos = pos} <- Parsec.getParserState
|
||||
|
||||
case takeIdentifier input of
|
||||
Nothing -> Parsec.parserFail "parseValid: Failed to match a valid identifier"
|
||||
Just (namespace, op, ident, cl, input') ->
|
||||
let posOp = updatePosChar pos op
|
||||
posIdent = Text.foldl updatePosChar posOp ident
|
||||
posCl = updatePosChar posIdent cl
|
||||
newState = state{stateInput = input', statePos = posCl}
|
||||
in Parsec.setParserState newState $> Identifier namespace op (Text.unpack ident) cl
|
||||
|
||||
{- | Try to parse a delimited identifier off the front of the given input.
|
||||
|
||||
This tries to match as many valid Haskell identifiers/operators as possible,
|
||||
to the point of sometimes accepting invalid things (ex: keywords). Some
|
||||
considerations:
|
||||
|
||||
- operators and identifiers can have module qualifications
|
||||
- operators can be wrapped in parens (for prefix)
|
||||
- identifiers can be wrapped in backticks (for infix)
|
||||
- delimiters are backticks or regular ticks
|
||||
- since regular ticks are also valid in identifiers, we opt for the
|
||||
longest successful parse
|
||||
|
||||
This function should make /O(1)/ allocations
|
||||
-}
|
||||
takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text)
|
||||
takeIdentifier input = listToMaybe $ do
|
||||
-- Optional namespace
|
||||
let (namespace, input') = case Text.uncons input of
|
||||
Just ('v', i) -> (Value, i)
|
||||
Just ('t', i) -> (Type, i)
|
||||
_ -> (None, input)
|
||||
-- Opening tick
|
||||
(op, input'') <- maybeToList (Text.uncons input')
|
||||
guard (op == '\'' || op == '`')
|
||||
|
||||
-- Identifier/operator
|
||||
(ident, input''') <- wrapped input''
|
||||
|
||||
-- Closing tick
|
||||
(cl, input'''') <- maybeToList (Text.uncons input''')
|
||||
guard (cl == '\'' || cl == '`')
|
||||
|
||||
pure (namespace, op, ident, cl, input'''')
|
||||
where
|
||||
-- \| Parse out a wrapped, possibly qualified, operator or identifier
|
||||
wrapped t = do
|
||||
(c, t') <- maybeToList (Text.uncons t)
|
||||
-- Tuples
|
||||
case c of
|
||||
'('
|
||||
| Just (c', _) <- Text.uncons t'
|
||||
, c' == ',' || c' == ')' ->
|
||||
do
|
||||
let (commas, t'') = Text.span (== ',') t'
|
||||
(')', t''') <- maybeToList (Text.uncons t'')
|
||||
pure (Text.take (Text.length commas + 2) t, t''')
|
||||
|
||||
-- Parenthesized
|
||||
'(' -> do
|
||||
(n, t'') <- general False 0 [] t'
|
||||
(')', t''') <- maybeToList (Text.uncons t'')
|
||||
pure (Text.take (n + 2) t, t''')
|
||||
|
||||
-- Backticked
|
||||
'`' -> do
|
||||
(n, t'') <- general False 0 [] t'
|
||||
('`', t''') <- maybeToList (Text.uncons t'')
|
||||
pure (Text.take (n + 2) t, t''')
|
||||
|
||||
-- Unadorned
|
||||
_ -> do
|
||||
(n, t'') <- general False 0 [] t
|
||||
pure (Text.take n t, t'')
|
||||
|
||||
-- \| Parse out a possibly qualified operator or identifier
|
||||
general ::
|
||||
Bool ->
|
||||
-- \^ refuse inputs starting with operators
|
||||
Int ->
|
||||
-- \^ total characters \"consumed\" so far
|
||||
[(Int, Text)] ->
|
||||
-- \^ accumulated results
|
||||
Text ->
|
||||
-- \^ current input
|
||||
[(Int, Text)]
|
||||
-- \^ total characters parsed & what remains
|
||||
general !identOnly !i acc t
|
||||
-- Starts with an identifier (either just an identifier, or a module qual)
|
||||
| Just (n, rest) <- identLike t =
|
||||
if Text.null rest
|
||||
then acc
|
||||
else case Text.head rest of
|
||||
'`' -> (n + i, rest) : acc
|
||||
')' -> (n + i, rest) : acc
|
||||
'.' -> general False (n + i + 1) acc (Text.tail rest)
|
||||
'\'' ->
|
||||
let (m, rest') = quotes rest
|
||||
in general True (n + m + 1 + i) ((n + m + i, rest') : acc) (Text.tail rest')
|
||||
_ -> acc
|
||||
-- An operator
|
||||
| Just (n, rest) <- optr t
|
||||
, not identOnly =
|
||||
(n + i, rest) : acc
|
||||
-- Anything else
|
||||
| otherwise =
|
||||
acc
|
||||
|
||||
-- \| Parse an identifier off the front of the input
|
||||
identLike t
|
||||
| Text.null t = Nothing
|
||||
| isAlpha (Text.head t) || '_' == Text.head t =
|
||||
let !(idt, rest) = Text.span (\c -> isAlphaNum c || c == '_') t
|
||||
!(octos, rest') = Text.span (== '#') rest
|
||||
in Just (Text.length idt + Text.length octos, rest')
|
||||
| otherwise = Nothing
|
||||
|
||||
-- \| Parse all but the last quote off the front of the input
|
||||
-- PRECONDITION: T.head t `elem` ['\'', '`']
|
||||
quotes :: Text -> (Int, Text)
|
||||
quotes t =
|
||||
let !n = Text.length (Text.takeWhile (`elem` ['\'', '`']) t) - 1
|
||||
in (n, Text.drop n t)
|
||||
|
||||
-- \| Parse an operator off the front of the input
|
||||
optr t =
|
||||
let !(op, rest) = Text.span isSymbolChar t
|
||||
in if Text.null op then Nothing else Just (Text.length op, rest)
|
||||
Loading…
Add table
Add a link
Reference in a new issue