forked from elland/haddock2
Init
This commit is contained in:
commit
c9f61c4e06
16 changed files with 1366 additions and 0 deletions
228
src/Parser.hs
Normal file
228
src/Parser.hs
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Parser (
|
||||
parse,
|
||||
parseText,
|
||||
) where
|
||||
|
||||
import Identifier
|
||||
import ParserMonad
|
||||
import Types
|
||||
|
||||
import Control.Applicative
|
||||
import Control.Monad
|
||||
import Data.Char qualified as Char
|
||||
import Data.Functor (($>))
|
||||
import Data.List (intercalate)
|
||||
import Data.Text (Text)
|
||||
import Data.Text qualified as Text
|
||||
import Parser.Util
|
||||
import Text.Parsec qualified as Parsec
|
||||
|
||||
parse :: Parser a -> Text -> (ParserState, a)
|
||||
parse parser = either err id . parse' (parser <* Parsec.eof)
|
||||
where
|
||||
err = error . ("Haddock.Parser.parse: " ++)
|
||||
|
||||
--------------------
|
||||
-- Markup
|
||||
--------------------
|
||||
|
||||
{- | Skips a single special character and treats it as a plain string.
|
||||
This is done to skip over any special characters belonging to other
|
||||
elements but which were not deemed meaningful at their positions.
|
||||
-}
|
||||
skipSpecialChar :: Parser (DocMarkup mod a)
|
||||
skipSpecialChar = DocString . pure <$> Parsec.oneOf specialChar
|
||||
|
||||
{- | Plain, regular parser for text. Called as one of the last parsers
|
||||
to ensure that we have already given a chance to more meaningful parsers
|
||||
before capturing their characters.
|
||||
-}
|
||||
string' :: Parser (DocMarkup mod a)
|
||||
string' =
|
||||
DocString
|
||||
-- After the first character, stop for @\(@ or @\[@ math starters. (The
|
||||
-- first character won't start a valid math string because this parser
|
||||
-- should follow math parsers. But this parser is expected to accept at
|
||||
-- least one character from all inputs that don't start with special
|
||||
-- characters, so the first character parser can't have the @"(["@
|
||||
-- restriction.)
|
||||
<$> ((:) <$> rawOrEscChar "" <*> many (rawOrEscChar "(["))
|
||||
where
|
||||
-- \| Parse a single logical character, either raw or escaped. Don't accept
|
||||
-- escaped characters from the argument string.
|
||||
rawOrEscChar :: [Char] -> Parser Char
|
||||
rawOrEscChar restrictedEscapes =
|
||||
Parsec.try $
|
||||
Parsec.noneOf specialChar >>= \case
|
||||
-- Handle backslashes:
|
||||
-- - Fail on forbidden escape characters.
|
||||
-- - Non-forbidden characters: simply unescape, e.g. parse "\b" as 'b',
|
||||
-- - Trailing backslash: treat it as a raw backslash, not an escape
|
||||
-- sequence. (This is the logic that this parser followed when this
|
||||
-- comment was written; it is not necessarily intentional but now I
|
||||
-- don't want to break anything relying on it.)
|
||||
'\\' -> Parsec.noneOf restrictedEscapes <|> Parsec.eof $> '\\'
|
||||
c -> pure c
|
||||
|
||||
{- | Emphasis parser.
|
||||
|
||||
>>> parseString "/Hello world/"
|
||||
DocEmphasis (DocString "Hello world")
|
||||
-}
|
||||
emphasis :: Parser (DocMarkup mod Identifier)
|
||||
emphasis =
|
||||
DocEmphasis . parseAll
|
||||
<$> disallowNewline ("/" *> takeWhile1_ (/= '/') <* "/")
|
||||
|
||||
{- | Bold parser.
|
||||
|
||||
>>> parseString "__Hello world__"
|
||||
DocBold (DocString "Hello world")
|
||||
-}
|
||||
bold :: Parser (DocMarkup mod Identifier)
|
||||
bold = DocBold . parseAll <$> disallowNewline ("__" *> takeUntil "__")
|
||||
|
||||
{- | Monospaced strings.
|
||||
|
||||
>>> parseString "@cruel@"
|
||||
DocMonospaced (DocString "cruel")
|
||||
-}
|
||||
monospace :: Parser (DocMarkup mod Identifier)
|
||||
monospace =
|
||||
DocMonospace . parseAll
|
||||
<$> ("@" *> takeWhile1_ (/= '@') <* "@")
|
||||
|
||||
{- | Text anchors to allow for jumping around the generated documentation.
|
||||
|
||||
>>> parseString "#Hello world#"
|
||||
DocAName "Hello world"
|
||||
-}
|
||||
anchor :: Parser (DocMarkup mod a)
|
||||
anchor =
|
||||
DocAnchor . Text.unpack
|
||||
<$> ("#" *> takeWhile1_ (\x -> x /= '#' && not (Char.isSpace x)) <* "#")
|
||||
|
||||
-- | Parses identifiers with help of 'parseValid'.
|
||||
identifier :: Parser (DocMarkup mod Identifier)
|
||||
identifier = DocIdentifier <$> parseValid
|
||||
|
||||
{- | Module names.
|
||||
|
||||
Note that we allow '#' and '\' to support anchors (old style anchors are of
|
||||
the form "SomeModule\#anchor").
|
||||
-}
|
||||
moduleName :: Parser (DocMarkup mod a)
|
||||
moduleName = DocModule . flip ModuleLink Nothing <$> ("\"" *> moduleNameString <* "\"")
|
||||
|
||||
-- | A module name, optionally with an anchor
|
||||
moduleNameString :: Parser String
|
||||
moduleNameString = moduleId `maybeFollowedBy` anchor_
|
||||
where
|
||||
moduleId = intercalate "." <$> conid `Parsec.sepBy1` "."
|
||||
anchor_ =
|
||||
(++)
|
||||
<$> (Parsec.string "#" <|> Parsec.string "\\#")
|
||||
<*> many (Parsec.satisfy (\c -> c /= '"' && not (Char.isSpace c)))
|
||||
|
||||
maybeFollowedBy pre suf = (\x -> maybe x (x ++)) <$> pre <*> optional suf
|
||||
|
||||
conid =
|
||||
(:)
|
||||
<$> Parsec.satisfy (\c -> Char.isAlpha c && Char.isUpper c)
|
||||
<*> many conChar
|
||||
|
||||
conChar = Parsec.alphaNum <|> Parsec.char '_'
|
||||
|
||||
------------------------
|
||||
-- Markup components
|
||||
------------------------
|
||||
|
||||
{- | List of characters that we use to delimit any special markup.
|
||||
Once we have checked for any of these and tried to parse the
|
||||
relevant markup, we can assume they are used as regular text.
|
||||
-}
|
||||
specialChar :: [Char]
|
||||
specialChar = "_/<@\"&'`#[ "
|
||||
|
||||
------------------------
|
||||
-- Helpers
|
||||
------------------------
|
||||
|
||||
parse' :: Parser a -> Text -> Either String (ParserState, a)
|
||||
parse' parser t =
|
||||
let parser' = (,) <$> parser <*> Parsec.getState
|
||||
in case Parsec.runParser parser' initialParserState "<haddock>" t of
|
||||
Left e -> Left (show e)
|
||||
Right (x, s) -> Right (s, x)
|
||||
|
||||
docConcat :: [DocMarkup mod id] -> DocMarkup mod id
|
||||
docConcat = foldr docAppend DocEmpty
|
||||
where
|
||||
-- Prevent doc append from becoming too nested
|
||||
docAppend (DocDefinitionList ds1) (DocDefinitionList ds2) = DocDefinitionList (ds1 <> ds2)
|
||||
docAppend (DocDefinitionList ds1) (DocAppend (DocDefinitionList ds2) d) = DocAppend (DocDefinitionList (ds1 <> ds2)) d
|
||||
docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 <> ds2)
|
||||
docAppend (DocOrderedList ds1) (DocAppend (DocOrderedList ds2) d) = DocAppend (DocOrderedList (ds1 <> ds2)) d
|
||||
docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 <> ds2)
|
||||
docAppend (DocUnorderedList ds1) (DocAppend (DocUnorderedList ds2) d) = DocAppend (DocUnorderedList (ds1 <> ds2)) d
|
||||
docAppend DocEmpty d = d
|
||||
docAppend d DocEmpty = d
|
||||
docAppend (DocString s1) (DocString s2) = DocString (s1 <> s2)
|
||||
docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 <> s2))
|
||||
docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 <> s2)) d
|
||||
docAppend d1 d2 = DocAppend d1 d2
|
||||
|
||||
{- | Parse a text paragraph. Actually just a wrapper over 'parseAll' which
|
||||
drops leading whitespace.
|
||||
-}
|
||||
parseText :: Text -> DocMarkup mod Identifier
|
||||
parseText = parseAll . Text.dropWhile Char.isSpace . Text.filter (/= '\r')
|
||||
|
||||
parseAll :: Text -> DocMarkup mod Identifier
|
||||
parseAll = snd . parse myParser
|
||||
where
|
||||
-- docConcat
|
||||
-- <$> many
|
||||
-- ( choice'
|
||||
-- [ monospace
|
||||
-- , anchor
|
||||
-- , identifier
|
||||
-- , moduleName
|
||||
-- , picture
|
||||
-- , mathDisplay
|
||||
-- , mathInline
|
||||
-- , markdownImage
|
||||
-- , markdownLink
|
||||
-- , hyperlink
|
||||
-- , bold
|
||||
-- , emphasis
|
||||
-- , encodedChar
|
||||
-- , string'
|
||||
-- , skipSpecialChar
|
||||
-- ]
|
||||
-- )
|
||||
myParser :: Parser (DocMarkup mod Identifier)
|
||||
myParser =
|
||||
docConcat
|
||||
<$> many
|
||||
( choice'
|
||||
[ monospace
|
||||
, anchor
|
||||
, identifier
|
||||
, moduleName
|
||||
, bold
|
||||
, emphasis
|
||||
, string'
|
||||
, skipSpecialChar
|
||||
]
|
||||
)
|
||||
|
||||
choice' :: [Parser a] -> Parser a
|
||||
choice' [] = empty
|
||||
choice' [p] = p
|
||||
choice' (p : ps) = Parsec.try p <|> choice' ps
|
||||
|
||||
disallowNewline :: Parser Text -> Parser Text
|
||||
disallowNewline = mfilter (Text.all (/= '\n'))
|
||||
Loading…
Add table
Add a link
Reference in a new issue