228 lines
7.3 KiB
Haskell
228 lines
7.3 KiB
Haskell
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
module Parser (
|
|
parse,
|
|
parseText,
|
|
) where
|
|
|
|
import Identifier
|
|
import ParserMonad
|
|
import Types
|
|
|
|
import Control.Applicative
|
|
import Control.Monad
|
|
import Data.Char qualified as Char
|
|
import Data.Functor (($>))
|
|
import Data.List (intercalate)
|
|
import Data.Text (Text)
|
|
import Data.Text qualified as Text
|
|
import Parser.Util
|
|
import Text.Parsec qualified as Parsec
|
|
|
|
parse :: Parser a -> Text -> (ParserState, a)
|
|
parse parser = either err id . parse' (parser <* Parsec.eof)
|
|
where
|
|
err = error . ("Haddock.Parser.parse: " ++)
|
|
|
|
--------------------
|
|
-- Markup
|
|
--------------------
|
|
|
|
{- | Skips a single special character and treats it as a plain string.
|
|
This is done to skip over any special characters belonging to other
|
|
elements but which were not deemed meaningful at their positions.
|
|
-}
|
|
skipSpecialChar :: Parser (DocMarkup mod a)
|
|
skipSpecialChar = DocString . pure <$> Parsec.oneOf specialChar
|
|
|
|
{- | Plain, regular parser for text. Called as one of the last parsers
|
|
to ensure that we have already given a chance to more meaningful parsers
|
|
before capturing their characters.
|
|
-}
|
|
string' :: Parser (DocMarkup mod a)
|
|
string' =
|
|
DocString
|
|
-- After the first character, stop for @\(@ or @\[@ math starters. (The
|
|
-- first character won't start a valid math string because this parser
|
|
-- should follow math parsers. But this parser is expected to accept at
|
|
-- least one character from all inputs that don't start with special
|
|
-- characters, so the first character parser can't have the @"(["@
|
|
-- restriction.)
|
|
<$> ((:) <$> rawOrEscChar "" <*> many (rawOrEscChar "(["))
|
|
where
|
|
-- \| Parse a single logical character, either raw or escaped. Don't accept
|
|
-- escaped characters from the argument string.
|
|
rawOrEscChar :: [Char] -> Parser Char
|
|
rawOrEscChar restrictedEscapes =
|
|
Parsec.try $
|
|
Parsec.noneOf specialChar >>= \case
|
|
-- Handle backslashes:
|
|
-- - Fail on forbidden escape characters.
|
|
-- - Non-forbidden characters: simply unescape, e.g. parse "\b" as 'b',
|
|
-- - Trailing backslash: treat it as a raw backslash, not an escape
|
|
-- sequence. (This is the logic that this parser followed when this
|
|
-- comment was written; it is not necessarily intentional but now I
|
|
-- don't want to break anything relying on it.)
|
|
'\\' -> Parsec.noneOf restrictedEscapes <|> Parsec.eof $> '\\'
|
|
c -> pure c
|
|
|
|
{- | Emphasis parser.
|
|
|
|
>>> parseString "/Hello world/"
|
|
DocEmphasis (DocString "Hello world")
|
|
-}
|
|
emphasis :: Parser (DocMarkup mod Identifier)
|
|
emphasis =
|
|
DocEmphasis . parseAll
|
|
<$> disallowNewline ("/" *> takeWhile1_ (/= '/') <* "/")
|
|
|
|
{- | Bold parser.
|
|
|
|
>>> parseString "__Hello world__"
|
|
DocBold (DocString "Hello world")
|
|
-}
|
|
bold :: Parser (DocMarkup mod Identifier)
|
|
bold = DocBold . parseAll <$> disallowNewline ("__" *> takeUntil "__")
|
|
|
|
{- | Monospaced strings.
|
|
|
|
>>> parseString "@cruel@"
|
|
DocMonospaced (DocString "cruel")
|
|
-}
|
|
monospace :: Parser (DocMarkup mod Identifier)
|
|
monospace =
|
|
DocMonospace . parseAll
|
|
<$> ("@" *> takeWhile1_ (/= '@') <* "@")
|
|
|
|
{- | Text anchors to allow for jumping around the generated documentation.
|
|
|
|
>>> parseString "#Hello world#"
|
|
DocAName "Hello world"
|
|
-}
|
|
anchor :: Parser (DocMarkup mod a)
|
|
anchor =
|
|
DocAnchor . Text.unpack
|
|
<$> ("#" *> takeWhile1_ (\x -> x /= '#' && not (Char.isSpace x)) <* "#")
|
|
|
|
-- | Parses identifiers with help of 'parseValid'.
|
|
identifier :: Parser (DocMarkup mod Identifier)
|
|
identifier = DocIdentifier <$> parseValid
|
|
|
|
{- | Module names.
|
|
|
|
Note that we allow '#' and '\' to support anchors (old style anchors are of
|
|
the form "SomeModule\#anchor").
|
|
-}
|
|
moduleName :: Parser (DocMarkup mod a)
|
|
moduleName = DocModule . flip ModuleLink Nothing <$> ("\"" *> moduleNameString <* "\"")
|
|
|
|
-- | A module name, optionally with an anchor
|
|
moduleNameString :: Parser String
|
|
moduleNameString = moduleId `maybeFollowedBy` anchor_
|
|
where
|
|
moduleId = intercalate "." <$> conid `Parsec.sepBy1` "."
|
|
anchor_ =
|
|
(++)
|
|
<$> (Parsec.string "#" <|> Parsec.string "\\#")
|
|
<*> many (Parsec.satisfy (\c -> c /= '"' && not (Char.isSpace c)))
|
|
|
|
maybeFollowedBy pre suf = (\x -> maybe x (x ++)) <$> pre <*> optional suf
|
|
|
|
conid =
|
|
(:)
|
|
<$> Parsec.satisfy (\c -> Char.isAlpha c && Char.isUpper c)
|
|
<*> many conChar
|
|
|
|
conChar = Parsec.alphaNum <|> Parsec.char '_'
|
|
|
|
------------------------
|
|
-- Markup components
|
|
------------------------
|
|
|
|
{- | List of characters that we use to delimit any special markup.
|
|
Once we have checked for any of these and tried to parse the
|
|
relevant markup, we can assume they are used as regular text.
|
|
-}
|
|
specialChar :: [Char]
|
|
specialChar = "_/<@\"&'`#[ "
|
|
|
|
------------------------
|
|
-- Helpers
|
|
------------------------
|
|
|
|
parse' :: Parser a -> Text -> Either String (ParserState, a)
|
|
parse' parser t =
|
|
let parser' = (,) <$> parser <*> Parsec.getState
|
|
in case Parsec.runParser parser' initialParserState "<haddock>" t of
|
|
Left e -> Left (show e)
|
|
Right (x, s) -> Right (s, x)
|
|
|
|
docConcat :: [DocMarkup mod id] -> DocMarkup mod id
|
|
docConcat = foldr docAppend DocEmpty
|
|
where
|
|
-- Prevent doc append from becoming too nested
|
|
docAppend (DocDefinitionList ds1) (DocDefinitionList ds2) = DocDefinitionList (ds1 <> ds2)
|
|
docAppend (DocDefinitionList ds1) (DocAppend (DocDefinitionList ds2) d) = DocAppend (DocDefinitionList (ds1 <> ds2)) d
|
|
docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 <> ds2)
|
|
docAppend (DocOrderedList ds1) (DocAppend (DocOrderedList ds2) d) = DocAppend (DocOrderedList (ds1 <> ds2)) d
|
|
docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 <> ds2)
|
|
docAppend (DocUnorderedList ds1) (DocAppend (DocUnorderedList ds2) d) = DocAppend (DocUnorderedList (ds1 <> ds2)) d
|
|
docAppend DocEmpty d = d
|
|
docAppend d DocEmpty = d
|
|
docAppend (DocString s1) (DocString s2) = DocString (s1 <> s2)
|
|
docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 <> s2))
|
|
docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 <> s2)) d
|
|
docAppend d1 d2 = DocAppend d1 d2
|
|
|
|
{- | Parse a text paragraph. Actually just a wrapper over 'parseAll' which
|
|
drops leading whitespace.
|
|
-}
|
|
parseText :: Text -> DocMarkup mod Identifier
|
|
parseText = parseAll . Text.dropWhile Char.isSpace . Text.filter (/= '\r')
|
|
|
|
parseAll :: Text -> DocMarkup mod Identifier
|
|
parseAll = snd . parse myParser
|
|
where
|
|
-- docConcat
|
|
-- <$> many
|
|
-- ( choice'
|
|
-- [ monospace
|
|
-- , anchor
|
|
-- , identifier
|
|
-- , moduleName
|
|
-- , picture
|
|
-- , mathDisplay
|
|
-- , mathInline
|
|
-- , markdownImage
|
|
-- , markdownLink
|
|
-- , hyperlink
|
|
-- , bold
|
|
-- , emphasis
|
|
-- , encodedChar
|
|
-- , string'
|
|
-- , skipSpecialChar
|
|
-- ]
|
|
-- )
|
|
myParser :: Parser (DocMarkup mod Identifier)
|
|
myParser =
|
|
docConcat
|
|
<$> many
|
|
( choice'
|
|
[ monospace
|
|
, anchor
|
|
, identifier
|
|
, moduleName
|
|
, bold
|
|
, emphasis
|
|
, string'
|
|
, skipSpecialChar
|
|
]
|
|
)
|
|
|
|
choice' :: [Parser a] -> Parser a
|
|
choice' [] = empty
|
|
choice' [p] = p
|
|
choice' (p : ps) = Parsec.try p <|> choice' ps
|
|
|
|
disallowNewline :: Parser Text -> Parser Text
|
|
disallowNewline = mfilter (Text.all (/= '\n'))
|