forked from elland/haddock2
Init
This commit is contained in:
commit
c9f61c4e06
16 changed files with 1366 additions and 0 deletions
157
src/Identifier.hs
Normal file
157
src/Identifier.hs
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
module Identifier (
|
||||
Identifier (..),
|
||||
Namespace (..),
|
||||
parseValid,
|
||||
) where
|
||||
|
||||
import Data.Text qualified as Text
|
||||
import Text.Parsec qualified as Parsec
|
||||
|
||||
import Control.Monad (guard)
|
||||
import Data.Char (isAlpha, isAlphaNum)
|
||||
import Data.Functor (($>))
|
||||
import Data.Maybe (listToMaybe, maybeToList)
|
||||
import Data.Text (Text)
|
||||
import ParserMonad
|
||||
import Text.Parsec (State (..))
|
||||
import Text.Parsec.Pos (updatePosChar)
|
||||
import Text.Read.Lex (isSymbolChar)
|
||||
|
||||
-- | Identifier string surrounded with namespace, opening, and closing quotes/backticks.
|
||||
data Identifier = Identifier !Namespace !Char String !Char
|
||||
deriving (Show, Eq)
|
||||
|
||||
-- | The namespace qualification for an identifier.
|
||||
data Namespace = Value | Type | None deriving (Eq, Ord, Enum, Show)
|
||||
|
||||
parseValid :: Parser Identifier
|
||||
parseValid = do
|
||||
state@State{stateInput = input, statePos = pos} <- Parsec.getParserState
|
||||
|
||||
case takeIdentifier input of
|
||||
Nothing -> Parsec.parserFail "parseValid: Failed to match a valid identifier"
|
||||
Just (namespace, op, ident, cl, input') ->
|
||||
let posOp = updatePosChar pos op
|
||||
posIdent = Text.foldl updatePosChar posOp ident
|
||||
posCl = updatePosChar posIdent cl
|
||||
newState = state{stateInput = input', statePos = posCl}
|
||||
in Parsec.setParserState newState $> Identifier namespace op (Text.unpack ident) cl
|
||||
|
||||
{- | Try to parse a delimited identifier off the front of the given input.
|
||||
|
||||
This tries to match as many valid Haskell identifiers/operators as possible,
|
||||
to the point of sometimes accepting invalid things (ex: keywords). Some
|
||||
considerations:
|
||||
|
||||
- operators and identifiers can have module qualifications
|
||||
- operators can be wrapped in parens (for prefix)
|
||||
- identifiers can be wrapped in backticks (for infix)
|
||||
- delimiters are backticks or regular ticks
|
||||
- since regular ticks are also valid in identifiers, we opt for the
|
||||
longest successful parse
|
||||
|
||||
This function should make /O(1)/ allocations
|
||||
-}
|
||||
takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text)
|
||||
takeIdentifier input = listToMaybe $ do
|
||||
-- Optional namespace
|
||||
let (namespace, input') = case Text.uncons input of
|
||||
Just ('v', i) -> (Value, i)
|
||||
Just ('t', i) -> (Type, i)
|
||||
_ -> (None, input)
|
||||
-- Opening tick
|
||||
(op, input'') <- maybeToList (Text.uncons input')
|
||||
guard (op == '\'' || op == '`')
|
||||
|
||||
-- Identifier/operator
|
||||
(ident, input''') <- wrapped input''
|
||||
|
||||
-- Closing tick
|
||||
(cl, input'''') <- maybeToList (Text.uncons input''')
|
||||
guard (cl == '\'' || cl == '`')
|
||||
|
||||
pure (namespace, op, ident, cl, input'''')
|
||||
where
|
||||
-- \| Parse out a wrapped, possibly qualified, operator or identifier
|
||||
wrapped t = do
|
||||
(c, t') <- maybeToList (Text.uncons t)
|
||||
-- Tuples
|
||||
case c of
|
||||
'('
|
||||
| Just (c', _) <- Text.uncons t'
|
||||
, c' == ',' || c' == ')' ->
|
||||
do
|
||||
let (commas, t'') = Text.span (== ',') t'
|
||||
(')', t''') <- maybeToList (Text.uncons t'')
|
||||
pure (Text.take (Text.length commas + 2) t, t''')
|
||||
|
||||
-- Parenthesized
|
||||
'(' -> do
|
||||
(n, t'') <- general False 0 [] t'
|
||||
(')', t''') <- maybeToList (Text.uncons t'')
|
||||
pure (Text.take (n + 2) t, t''')
|
||||
|
||||
-- Backticked
|
||||
'`' -> do
|
||||
(n, t'') <- general False 0 [] t'
|
||||
('`', t''') <- maybeToList (Text.uncons t'')
|
||||
pure (Text.take (n + 2) t, t''')
|
||||
|
||||
-- Unadorned
|
||||
_ -> do
|
||||
(n, t'') <- general False 0 [] t
|
||||
pure (Text.take n t, t'')
|
||||
|
||||
-- \| Parse out a possibly qualified operator or identifier
|
||||
general ::
|
||||
Bool ->
|
||||
-- \^ refuse inputs starting with operators
|
||||
Int ->
|
||||
-- \^ total characters \"consumed\" so far
|
||||
[(Int, Text)] ->
|
||||
-- \^ accumulated results
|
||||
Text ->
|
||||
-- \^ current input
|
||||
[(Int, Text)]
|
||||
-- \^ total characters parsed & what remains
|
||||
general !identOnly !i acc t
|
||||
-- Starts with an identifier (either just an identifier, or a module qual)
|
||||
| Just (n, rest) <- identLike t =
|
||||
if Text.null rest
|
||||
then acc
|
||||
else case Text.head rest of
|
||||
'`' -> (n + i, rest) : acc
|
||||
')' -> (n + i, rest) : acc
|
||||
'.' -> general False (n + i + 1) acc (Text.tail rest)
|
||||
'\'' ->
|
||||
let (m, rest') = quotes rest
|
||||
in general True (n + m + 1 + i) ((n + m + i, rest') : acc) (Text.tail rest')
|
||||
_ -> acc
|
||||
-- An operator
|
||||
| Just (n, rest) <- optr t
|
||||
, not identOnly =
|
||||
(n + i, rest) : acc
|
||||
-- Anything else
|
||||
| otherwise =
|
||||
acc
|
||||
|
||||
-- \| Parse an identifier off the front of the input
|
||||
identLike t
|
||||
| Text.null t = Nothing
|
||||
| isAlpha (Text.head t) || '_' == Text.head t =
|
||||
let !(idt, rest) = Text.span (\c -> isAlphaNum c || c == '_') t
|
||||
!(octos, rest') = Text.span (== '#') rest
|
||||
in Just (Text.length idt + Text.length octos, rest')
|
||||
| otherwise = Nothing
|
||||
|
||||
-- \| Parse all but the last quote off the front of the input
|
||||
-- PRECONDITION: T.head t `elem` ['\'', '`']
|
||||
quotes :: Text -> (Int, Text)
|
||||
quotes t =
|
||||
let !n = Text.length (Text.takeWhile (`elem` ['\'', '`']) t) - 1
|
||||
in (n, Text.drop n t)
|
||||
|
||||
-- \| Parse an operator off the front of the input
|
||||
optr t =
|
||||
let !(op, rest) = Text.span isSymbolChar t
|
||||
in if Text.null op then Nothing else Just (Text.length op, rest)
|
||||
190
src/Lexer.hs
Normal file
190
src/Lexer.hs
Normal file
|
|
@ -0,0 +1,190 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Lexer (
|
||||
Token (..),
|
||||
lexer,
|
||||
emphasis,
|
||||
) where
|
||||
|
||||
import Control.Monad (mfilter)
|
||||
import Data.Char (isAlphaNum, isPrint)
|
||||
import Data.Functor (($>))
|
||||
import Data.Text (Text)
|
||||
import Data.Text qualified as Text
|
||||
import GHC.Stack (HasCallStack)
|
||||
import ParserMonad (Parser, initialParserState)
|
||||
import Text.Parsec
|
||||
import Text.Parsec qualified as Parsec
|
||||
import Text.Parsec.Pos (updatePosChar)
|
||||
|
||||
type LocatedToken = (SourcePos, Token)
|
||||
|
||||
type Lexer = Parser [LocatedToken]
|
||||
|
||||
data Token
|
||||
= Token Text
|
||||
| Anchor
|
||||
| AngleOpen
|
||||
| AngleClose
|
||||
| BoldOpen
|
||||
| BoldClose
|
||||
| BracketOpen
|
||||
| BracketClose
|
||||
| EmphasisOpen
|
||||
| EmphasisClose
|
||||
| MonospaceOpen
|
||||
| MonospaceClose
|
||||
| Newline
|
||||
| ParenOpen
|
||||
| ParenClose
|
||||
| QuoteOpen
|
||||
| QuoteClose
|
||||
| Space
|
||||
| EOF
|
||||
deriving (Eq, Show)
|
||||
|
||||
lexer :: String -> Either ParseError [LocatedToken]
|
||||
lexer = Parsec.runParser lexText initialParserState "input" . Text.pack
|
||||
|
||||
lexText :: (HasCallStack) => Parser [LocatedToken]
|
||||
lexText = go
|
||||
where
|
||||
go = do
|
||||
Parsec.optionMaybe Parsec.eof >>= \case
|
||||
Just _ -> pure []
|
||||
Nothing -> do
|
||||
toks <-
|
||||
choice
|
||||
[ newlineToken
|
||||
, spaceToken
|
||||
, textElement
|
||||
, identifier
|
||||
, other
|
||||
]
|
||||
rest <- go
|
||||
pure (toks <> rest)
|
||||
|
||||
match :: Parser a -> Parser (Text, a)
|
||||
match p = do
|
||||
input <- getInput
|
||||
result <- p
|
||||
input' <- getInput
|
||||
let !consumed = Text.take (Text.length input - Text.length input') input
|
||||
pure (consumed, result)
|
||||
|
||||
-- Tokens
|
||||
|
||||
textElement :: Parser [LocatedToken]
|
||||
textElement =
|
||||
choice $
|
||||
Parsec.try
|
||||
<$> [ emphasis
|
||||
, bold
|
||||
, monospace
|
||||
, parens
|
||||
, brackets
|
||||
, angles
|
||||
]
|
||||
|
||||
delimited :: String -> String -> Token -> Token -> Parser [LocatedToken]
|
||||
delimited c1 c2 ot ct = do
|
||||
pos <- getPosition
|
||||
(_, content) <- match $ between op cl any'
|
||||
|
||||
innerToks <- case lexer $ Text.unpack content of
|
||||
Left _ -> do
|
||||
pos' <- getPosition
|
||||
pure $ [(pos', Token content)]
|
||||
Right toks -> pure toks
|
||||
|
||||
let openTok :: LocatedToken = (pos, ot)
|
||||
closeTok :: LocatedToken = (pos, ct)
|
||||
|
||||
pure $ openTok : innerToks <> [closeTok]
|
||||
where
|
||||
op = string c1
|
||||
cl = string c2
|
||||
any' = Text.pack <$> manyTill anyChar (lookAhead cl)
|
||||
|
||||
delimited' :: String -> Token -> Token -> Parser [LocatedToken]
|
||||
delimited' s t1 t2 = delimited s s t1 t2
|
||||
|
||||
emphasis :: Lexer
|
||||
emphasis = delimited' "/" EmphasisOpen EmphasisClose
|
||||
|
||||
bold :: Lexer
|
||||
bold = delimited' "__" BoldOpen BoldClose
|
||||
|
||||
monospace :: Lexer
|
||||
monospace = delimited' "@" MonospaceOpen MonospaceClose
|
||||
|
||||
parens :: Parser [LocatedToken]
|
||||
parens = delimited "(" ")" ParenOpen ParenClose
|
||||
|
||||
brackets :: Lexer
|
||||
brackets = delimited "[" "]" ParenOpen ParenClose
|
||||
|
||||
angles :: Parser [LocatedToken]
|
||||
angles = delimited "<" ">" AngleOpen AngleClose
|
||||
|
||||
other :: Lexer
|
||||
other = do
|
||||
pos <- getPosition
|
||||
c <- takeWhile1_ isPrint
|
||||
pure . pure $ (pos, Token c)
|
||||
|
||||
spaceToken :: Lexer
|
||||
spaceToken = do
|
||||
pos <- getPosition
|
||||
_ <- many1 (char ' ')
|
||||
pure . pure $ (pos, Space)
|
||||
|
||||
newlineToken :: Lexer
|
||||
newlineToken = do
|
||||
pos <- getPosition
|
||||
_ <- newline
|
||||
pure . pure $ (pos, Newline)
|
||||
|
||||
identifier :: Lexer
|
||||
identifier = do
|
||||
pos <- getPosition
|
||||
txt <- takeWhile1_ isAlphaNum
|
||||
pure . pure $ (pos, Token txt)
|
||||
|
||||
-------
|
||||
-- Helpers
|
||||
-------
|
||||
|
||||
-- | Like `takeWhile`, but unconditionally take escaped characters.
|
||||
takeWhile_ :: (Char -> Bool) -> Parser Text
|
||||
takeWhile_ p = scan p_ False
|
||||
where
|
||||
p_ escaped c
|
||||
| escaped = Just False
|
||||
| not $ p c = Nothing
|
||||
| otherwise = Just (c == '\\')
|
||||
|
||||
-- | Like 'takeWhile1', but unconditionally take escaped characters.
|
||||
takeWhile1_ :: (Char -> Bool) -> Parser Text
|
||||
takeWhile1_ = mfilter (not . Text.null) . takeWhile_
|
||||
|
||||
{- | Scan the input text, accumulating characters as long as the scanning
|
||||
function returns true.
|
||||
-}
|
||||
scan ::
|
||||
-- | scan function
|
||||
(state -> Char -> Maybe state) ->
|
||||
-- | initial state
|
||||
state ->
|
||||
Parser Text
|
||||
scan f initState = do
|
||||
parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
|
||||
(remaining, finalPos, ct) <- go input initState pos 0
|
||||
let newState = parserState{stateInput = remaining, statePos = finalPos}
|
||||
Parsec.setParserState newState $> Text.take ct input
|
||||
where
|
||||
go !input' !st !posAccum !count' = case Text.uncons input' of
|
||||
Nothing -> pure (input', posAccum, count')
|
||||
Just (char', input'') -> case f st char' of
|
||||
Nothing -> pure (input', posAccum, count')
|
||||
Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
|
||||
228
src/Parser.hs
Normal file
228
src/Parser.hs
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Parser (
|
||||
parse,
|
||||
parseText,
|
||||
) where
|
||||
|
||||
import Identifier
|
||||
import ParserMonad
|
||||
import Types
|
||||
|
||||
import Control.Applicative
|
||||
import Control.Monad
|
||||
import Data.Char qualified as Char
|
||||
import Data.Functor (($>))
|
||||
import Data.List (intercalate)
|
||||
import Data.Text (Text)
|
||||
import Data.Text qualified as Text
|
||||
import Parser.Util
|
||||
import Text.Parsec qualified as Parsec
|
||||
|
||||
parse :: Parser a -> Text -> (ParserState, a)
|
||||
parse parser = either err id . parse' (parser <* Parsec.eof)
|
||||
where
|
||||
err = error . ("Haddock.Parser.parse: " ++)
|
||||
|
||||
--------------------
|
||||
-- Markup
|
||||
--------------------
|
||||
|
||||
{- | Skips a single special character and treats it as a plain string.
|
||||
This is done to skip over any special characters belonging to other
|
||||
elements but which were not deemed meaningful at their positions.
|
||||
-}
|
||||
skipSpecialChar :: Parser (DocMarkup mod a)
|
||||
skipSpecialChar = DocString . pure <$> Parsec.oneOf specialChar
|
||||
|
||||
{- | Plain, regular parser for text. Called as one of the last parsers
|
||||
to ensure that we have already given a chance to more meaningful parsers
|
||||
before capturing their characters.
|
||||
-}
|
||||
string' :: Parser (DocMarkup mod a)
|
||||
string' =
|
||||
DocString
|
||||
-- After the first character, stop for @\(@ or @\[@ math starters. (The
|
||||
-- first character won't start a valid math string because this parser
|
||||
-- should follow math parsers. But this parser is expected to accept at
|
||||
-- least one character from all inputs that don't start with special
|
||||
-- characters, so the first character parser can't have the @"(["@
|
||||
-- restriction.)
|
||||
<$> ((:) <$> rawOrEscChar "" <*> many (rawOrEscChar "(["))
|
||||
where
|
||||
-- \| Parse a single logical character, either raw or escaped. Don't accept
|
||||
-- escaped characters from the argument string.
|
||||
rawOrEscChar :: [Char] -> Parser Char
|
||||
rawOrEscChar restrictedEscapes =
|
||||
Parsec.try $
|
||||
Parsec.noneOf specialChar >>= \case
|
||||
-- Handle backslashes:
|
||||
-- - Fail on forbidden escape characters.
|
||||
-- - Non-forbidden characters: simply unescape, e.g. parse "\b" as 'b',
|
||||
-- - Trailing backslash: treat it as a raw backslash, not an escape
|
||||
-- sequence. (This is the logic that this parser followed when this
|
||||
-- comment was written; it is not necessarily intentional but now I
|
||||
-- don't want to break anything relying on it.)
|
||||
'\\' -> Parsec.noneOf restrictedEscapes <|> Parsec.eof $> '\\'
|
||||
c -> pure c
|
||||
|
||||
{- | Emphasis parser.
|
||||
|
||||
>>> parseString "/Hello world/"
|
||||
DocEmphasis (DocString "Hello world")
|
||||
-}
|
||||
emphasis :: Parser (DocMarkup mod Identifier)
|
||||
emphasis =
|
||||
DocEmphasis . parseAll
|
||||
<$> disallowNewline ("/" *> takeWhile1_ (/= '/') <* "/")
|
||||
|
||||
{- | Bold parser.
|
||||
|
||||
>>> parseString "__Hello world__"
|
||||
DocBold (DocString "Hello world")
|
||||
-}
|
||||
bold :: Parser (DocMarkup mod Identifier)
|
||||
bold = DocBold . parseAll <$> disallowNewline ("__" *> takeUntil "__")
|
||||
|
||||
{- | Monospaced strings.
|
||||
|
||||
>>> parseString "@cruel@"
|
||||
DocMonospaced (DocString "cruel")
|
||||
-}
|
||||
monospace :: Parser (DocMarkup mod Identifier)
|
||||
monospace =
|
||||
DocMonospace . parseAll
|
||||
<$> ("@" *> takeWhile1_ (/= '@') <* "@")
|
||||
|
||||
{- | Text anchors to allow for jumping around the generated documentation.
|
||||
|
||||
>>> parseString "#Hello world#"
|
||||
DocAName "Hello world"
|
||||
-}
|
||||
anchor :: Parser (DocMarkup mod a)
|
||||
anchor =
|
||||
DocAnchor . Text.unpack
|
||||
<$> ("#" *> takeWhile1_ (\x -> x /= '#' && not (Char.isSpace x)) <* "#")
|
||||
|
||||
-- | Parses identifiers with help of 'parseValid'.
|
||||
identifier :: Parser (DocMarkup mod Identifier)
|
||||
identifier = DocIdentifier <$> parseValid
|
||||
|
||||
{- | Module names.
|
||||
|
||||
Note that we allow '#' and '\' to support anchors (old style anchors are of
|
||||
the form "SomeModule\#anchor").
|
||||
-}
|
||||
moduleName :: Parser (DocMarkup mod a)
|
||||
moduleName = DocModule . flip ModuleLink Nothing <$> ("\"" *> moduleNameString <* "\"")
|
||||
|
||||
-- | A module name, optionally with an anchor
|
||||
moduleNameString :: Parser String
|
||||
moduleNameString = moduleId `maybeFollowedBy` anchor_
|
||||
where
|
||||
moduleId = intercalate "." <$> conid `Parsec.sepBy1` "."
|
||||
anchor_ =
|
||||
(++)
|
||||
<$> (Parsec.string "#" <|> Parsec.string "\\#")
|
||||
<*> many (Parsec.satisfy (\c -> c /= '"' && not (Char.isSpace c)))
|
||||
|
||||
maybeFollowedBy pre suf = (\x -> maybe x (x ++)) <$> pre <*> optional suf
|
||||
|
||||
conid =
|
||||
(:)
|
||||
<$> Parsec.satisfy (\c -> Char.isAlpha c && Char.isUpper c)
|
||||
<*> many conChar
|
||||
|
||||
conChar = Parsec.alphaNum <|> Parsec.char '_'
|
||||
|
||||
------------------------
|
||||
-- Markup components
|
||||
------------------------
|
||||
|
||||
{- | List of characters that we use to delimit any special markup.
|
||||
Once we have checked for any of these and tried to parse the
|
||||
relevant markup, we can assume they are used as regular text.
|
||||
-}
|
||||
specialChar :: [Char]
|
||||
specialChar = "_/<@\"&'`#[ "
|
||||
|
||||
------------------------
|
||||
-- Helpers
|
||||
------------------------
|
||||
|
||||
parse' :: Parser a -> Text -> Either String (ParserState, a)
|
||||
parse' parser t =
|
||||
let parser' = (,) <$> parser <*> Parsec.getState
|
||||
in case Parsec.runParser parser' initialParserState "<haddock>" t of
|
||||
Left e -> Left (show e)
|
||||
Right (x, s) -> Right (s, x)
|
||||
|
||||
docConcat :: [DocMarkup mod id] -> DocMarkup mod id
|
||||
docConcat = foldr docAppend DocEmpty
|
||||
where
|
||||
-- Prevent doc append from becoming too nested
|
||||
docAppend (DocDefinitionList ds1) (DocDefinitionList ds2) = DocDefinitionList (ds1 <> ds2)
|
||||
docAppend (DocDefinitionList ds1) (DocAppend (DocDefinitionList ds2) d) = DocAppend (DocDefinitionList (ds1 <> ds2)) d
|
||||
docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 <> ds2)
|
||||
docAppend (DocOrderedList ds1) (DocAppend (DocOrderedList ds2) d) = DocAppend (DocOrderedList (ds1 <> ds2)) d
|
||||
docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 <> ds2)
|
||||
docAppend (DocUnorderedList ds1) (DocAppend (DocUnorderedList ds2) d) = DocAppend (DocUnorderedList (ds1 <> ds2)) d
|
||||
docAppend DocEmpty d = d
|
||||
docAppend d DocEmpty = d
|
||||
docAppend (DocString s1) (DocString s2) = DocString (s1 <> s2)
|
||||
docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 <> s2))
|
||||
docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 <> s2)) d
|
||||
docAppend d1 d2 = DocAppend d1 d2
|
||||
|
||||
{- | Parse a text paragraph. Actually just a wrapper over 'parseAll' which
|
||||
drops leading whitespace.
|
||||
-}
|
||||
parseText :: Text -> DocMarkup mod Identifier
|
||||
parseText = parseAll . Text.dropWhile Char.isSpace . Text.filter (/= '\r')
|
||||
|
||||
parseAll :: Text -> DocMarkup mod Identifier
|
||||
parseAll = snd . parse myParser
|
||||
where
|
||||
-- docConcat
|
||||
-- <$> many
|
||||
-- ( choice'
|
||||
-- [ monospace
|
||||
-- , anchor
|
||||
-- , identifier
|
||||
-- , moduleName
|
||||
-- , picture
|
||||
-- , mathDisplay
|
||||
-- , mathInline
|
||||
-- , markdownImage
|
||||
-- , markdownLink
|
||||
-- , hyperlink
|
||||
-- , bold
|
||||
-- , emphasis
|
||||
-- , encodedChar
|
||||
-- , string'
|
||||
-- , skipSpecialChar
|
||||
-- ]
|
||||
-- )
|
||||
myParser :: Parser (DocMarkup mod Identifier)
|
||||
myParser =
|
||||
docConcat
|
||||
<$> many
|
||||
( choice'
|
||||
[ monospace
|
||||
, anchor
|
||||
, identifier
|
||||
, moduleName
|
||||
, bold
|
||||
, emphasis
|
||||
, string'
|
||||
, skipSpecialChar
|
||||
]
|
||||
)
|
||||
|
||||
choice' :: [Parser a] -> Parser a
|
||||
choice' [] = empty
|
||||
choice' [p] = p
|
||||
choice' (p : ps) = Parsec.try p <|> choice' ps
|
||||
|
||||
disallowNewline :: Parser Text -> Parser Text
|
||||
disallowNewline = mfilter (Text.all (/= '\n'))
|
||||
65
src/Parser/Util.hs
Normal file
65
src/Parser/Util.hs
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
module Parser.Util where
|
||||
|
||||
import Control.Monad (mfilter)
|
||||
import Data.Functor (($>))
|
||||
import Data.Text (Text)
|
||||
import Data.Text qualified as Text
|
||||
import ParserMonad (Parser)
|
||||
import Text.Parsec (State (..))
|
||||
import Text.Parsec qualified as Parsec
|
||||
import Text.Parsec.Pos (updatePosChar)
|
||||
|
||||
{- | Consume characters from the input up to and including the given pattern.
|
||||
Return everything consumed except for the end pattern itself.
|
||||
-}
|
||||
takeUntil :: Text -> Parser Text
|
||||
takeUntil end_ = Text.dropEnd (Text.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome
|
||||
where
|
||||
end = Text.unpack end_
|
||||
|
||||
p :: (Bool, String) -> Char -> Maybe (Bool, String)
|
||||
p acc c = case acc of
|
||||
(True, _) -> Just (False, end)
|
||||
(_, []) -> Nothing
|
||||
(_, x : xs) | x == c -> Just (False, xs)
|
||||
_ -> Just (c == '\\', end)
|
||||
|
||||
requireEnd = mfilter (Text.isSuffixOf end_)
|
||||
|
||||
gotSome xs
|
||||
| Text.null xs = fail "didn't get any content"
|
||||
| otherwise = return xs
|
||||
|
||||
-- | Like `takeWhile`, but unconditionally take escaped characters.
|
||||
takeWhile_ :: (Char -> Bool) -> Parser Text
|
||||
takeWhile_ p = scan p_ False
|
||||
where
|
||||
p_ escaped c
|
||||
| escaped = Just False
|
||||
| not $ p c = Nothing
|
||||
| otherwise = Just (c == '\\')
|
||||
|
||||
-- | Like 'takeWhile1', but unconditionally take escaped characters.
|
||||
takeWhile1_ :: (Char -> Bool) -> Parser Text
|
||||
takeWhile1_ = mfilter (not . Text.null) . takeWhile_
|
||||
|
||||
{- | Scan the input text, accumulating characters as long as the scanning
|
||||
function returns true.
|
||||
-}
|
||||
scan ::
|
||||
-- | scan function
|
||||
(state -> Char -> Maybe state) ->
|
||||
-- | initial state
|
||||
state ->
|
||||
Parser Text
|
||||
scan f initState = do
|
||||
parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
|
||||
(remaining, finalPos, ct) <- go input initState pos 0
|
||||
let newState = parserState{stateInput = remaining, statePos = finalPos}
|
||||
Parsec.setParserState newState $> Text.take ct input
|
||||
where
|
||||
go !input' !st !posAccum !count' = case Text.uncons input' of
|
||||
Nothing -> pure (input', posAccum, count')
|
||||
Just (char', input'') -> case f st char' of
|
||||
Nothing -> pure (input', posAccum, count')
|
||||
Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
|
||||
26
src/ParserMonad.hs
Normal file
26
src/ParserMonad.hs
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
module ParserMonad where
|
||||
|
||||
import Data.String
|
||||
import Data.Text (Text)
|
||||
import Data.Text qualified as Text
|
||||
import Text.Parsec (Parsec)
|
||||
import Text.Parsec qualified as Parsec
|
||||
|
||||
import Types
|
||||
|
||||
type Parser = Parsec Text ParserState
|
||||
|
||||
instance (a ~ Text) => IsString (Parser a) where
|
||||
fromString = fmap Text.pack . Parsec.string
|
||||
|
||||
{- | The only bit of information we really care about trudging along with us
|
||||
through parsing is the version attached to a @\@since@ annotation - if
|
||||
the doc even contained one.
|
||||
-}
|
||||
newtype ParserState = ParserState
|
||||
{ since :: Maybe Since
|
||||
}
|
||||
deriving (Eq, Show)
|
||||
|
||||
initialParserState :: ParserState
|
||||
initialParserState = ParserState Nothing
|
||||
196
src/Types.hs
Normal file
196
src/Types.hs
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
module Types (
|
||||
DocMarkup (..),
|
||||
Document (..),
|
||||
Meta (..),
|
||||
ModuleLink (..),
|
||||
Package,
|
||||
Since (..),
|
||||
Version,
|
||||
)
|
||||
where
|
||||
|
||||
newtype Document = Document
|
||||
{ meta :: Meta
|
||||
}
|
||||
deriving (Eq, Show)
|
||||
|
||||
newtype Meta = Meta
|
||||
{ since :: Maybe Since
|
||||
}
|
||||
deriving (Eq, Show)
|
||||
|
||||
data Since = Since
|
||||
{ package :: Maybe Package
|
||||
-- ^ optional package qualification
|
||||
, version :: Version
|
||||
}
|
||||
deriving (Eq, Show)
|
||||
|
||||
-- Could have a better type?
|
||||
type Version = [Int]
|
||||
type Package = String
|
||||
|
||||
data DocMarkup mod id
|
||||
= DocEmpty
|
||||
| -- | This is not represented in the markup language, this is for internal use
|
||||
DocAppend (DocMarkup mod id) (DocMarkup mod id)
|
||||
| -- | Any text that doesn't match any rules is a bare string
|
||||
DocString String
|
||||
| -- | Paragraphs are demarcated by blank lines
|
||||
DocParagraph (DocMarkup mod id)
|
||||
| -- | A haskell identifier
|
||||
DocIdentifier id
|
||||
| -- | A qualified identifier that couldn't be resolved.
|
||||
DocIdentifierUnchecked
|
||||
| -- | A link to a module, might include a label
|
||||
DocModule (ModuleLink (DocMarkup mod id))
|
||||
| -- | Emphasis /italics/
|
||||
DocEmphasis (DocMarkup mod id)
|
||||
| -- | Monospaced @source code@
|
||||
DocMonospace (DocMarkup mod id)
|
||||
| -- | Bold __bold text__
|
||||
DocBold (DocMarkup mod id)
|
||||
| {- | Unordered lists
|
||||
* this
|
||||
or
|
||||
- this
|
||||
-}
|
||||
DocUnorderedList [DocMarkup mod id]
|
||||
| {- | Ordered lists
|
||||
1. this
|
||||
or
|
||||
(1) this
|
||||
-}
|
||||
DocOrderedList [(Int, DocMarkup mod id)]
|
||||
| {- | Definition lists
|
||||
[term] a term
|
||||
[another term] another definition
|
||||
-}
|
||||
DocDefinitionList [(DocMarkup mod id, DocMarkup mod id)]
|
||||
| {- | Code blocks
|
||||
@
|
||||
a code block in here
|
||||
with multiple lines
|
||||
@
|
||||
|
||||
Or with bird tracks:
|
||||
> some code
|
||||
> goes here
|
||||
-}
|
||||
DocCodeBlock (DocMarkup mod id)
|
||||
| {- | Hyperlinks
|
||||
__marked__:
|
||||
<http://example.com>
|
||||
<http://example.com label text>
|
||||
__Auto-detected URLs__:
|
||||
http://example.com
|
||||
https://example.com
|
||||
ftp://example.com
|
||||
__Markdown style__
|
||||
[link text](http://example.com)
|
||||
[link text]("Module.Name")
|
||||
-}
|
||||
DocHyperlink (Hyperlink (DocMarkup mod id))
|
||||
| {- | Pictures
|
||||
<<image.png>>
|
||||
<<image.png title text>>
|
||||
|
||||
__Markdown Images__
|
||||
|
||||

|
||||
-}
|
||||
DocPicture Picture
|
||||
| {- | Inline math expressions
|
||||
\(mathematical expression\)
|
||||
-}
|
||||
DocMathInline String
|
||||
| {- | Math multiline display
|
||||
\[
|
||||
mathematical expression
|
||||
in multiple lines
|
||||
\]
|
||||
-}
|
||||
DocMathDisplay String
|
||||
| {- | Anchors, no spaces allowed
|
||||
#anchor-name#
|
||||
-}
|
||||
DocAnchor String
|
||||
| {- | Property descriptions
|
||||
prop> property description
|
||||
-}
|
||||
DocProperty String
|
||||
| {- | Examples
|
||||
>>> expression
|
||||
result line 1
|
||||
result line 2
|
||||
-}
|
||||
DocExamples [Example]
|
||||
| -- | Header
|
||||
DocHeader (Header (DocMarkup mod id))
|
||||
| -- Table
|
||||
DocTable (Table (DocMarkup mod id))
|
||||
deriving (Eq, Show)
|
||||
|
||||
instance Semigroup (DocMarkup mod id) where
|
||||
(<>) = DocAppend
|
||||
|
||||
instance Monoid (DocMarkup mod id) where
|
||||
mempty = DocEmpty
|
||||
mconcat = foldr (<>) mempty
|
||||
|
||||
data ModuleLink id = ModuleLink
|
||||
{ name :: String
|
||||
, label :: Maybe id
|
||||
}
|
||||
deriving (Eq, Show)
|
||||
|
||||
data Picture = Picture
|
||||
{ uri :: String
|
||||
, title :: Maybe String
|
||||
}
|
||||
deriving (Eq, Show)
|
||||
|
||||
data Hyperlink id = Hyperlink
|
||||
{ url :: String
|
||||
, label :: Maybe id
|
||||
}
|
||||
deriving (Eq, Show, Functor, Foldable, Traversable)
|
||||
|
||||
data TableCell id = TableCell
|
||||
{ col :: Int
|
||||
, row :: Int
|
||||
, content :: id
|
||||
}
|
||||
deriving (Eq, Show, Functor, Foldable, Traversable)
|
||||
|
||||
newtype TableRow id = TableRow
|
||||
{ rows :: [TableCell id]
|
||||
}
|
||||
deriving (Eq, Show, Functor, Foldable, Traversable)
|
||||
|
||||
data Table id = Table
|
||||
{ headerRows :: [TableRow id]
|
||||
, bodyRows :: [TableRow id]
|
||||
}
|
||||
deriving (Eq, Show, Functor, Foldable, Traversable)
|
||||
|
||||
data Example = Example
|
||||
{ exampleExpression :: String
|
||||
, exampleResult :: [String]
|
||||
}
|
||||
deriving (Eq, Show)
|
||||
|
||||
data Header id = Header
|
||||
{ level :: HeaderLevel
|
||||
, title :: id
|
||||
}
|
||||
deriving (Eq, Show)
|
||||
|
||||
data HeaderLevel
|
||||
= H1
|
||||
| H2
|
||||
| H3
|
||||
| H4
|
||||
| H5
|
||||
| H6
|
||||
deriving (Eq, Show, Bounded, Enum)
|
||||
Loading…
Add table
Add a link
Reference in a new issue