{-# LANGUAGE OverloadedStrings #-} module Parser ( parse, parseText, ) where import Identifier import ParserMonad import Types import Control.Applicative import Control.Monad import Data.Char qualified as Char import Data.Functor (($>)) import Data.List (intercalate) import Data.Text (Text) import Data.Text qualified as Text import Parser.Util import Text.Parsec qualified as Parsec parse :: Parser a -> Text -> (ParserState, a) parse parser = either err id . parse' (parser <* Parsec.eof) where err = error . ("Haddock.Parser.parse: " ++) -------------------- -- Markup -------------------- {- | Skips a single special character and treats it as a plain string. This is done to skip over any special characters belonging to other elements but which were not deemed meaningful at their positions. -} skipSpecialChar :: Parser (DocMarkup mod a) skipSpecialChar = DocString . pure <$> Parsec.oneOf specialChar {- | Plain, regular parser for text. Called as one of the last parsers to ensure that we have already given a chance to more meaningful parsers before capturing their characters. -} string' :: Parser (DocMarkup mod a) string' = DocString -- After the first character, stop for @\(@ or @\[@ math starters. (The -- first character won't start a valid math string because this parser -- should follow math parsers. But this parser is expected to accept at -- least one character from all inputs that don't start with special -- characters, so the first character parser can't have the @"(["@ -- restriction.) <$> ((:) <$> rawOrEscChar "" <*> many (rawOrEscChar "([")) where -- \| Parse a single logical character, either raw or escaped. Don't accept -- escaped characters from the argument string. rawOrEscChar :: [Char] -> Parser Char rawOrEscChar restrictedEscapes = Parsec.try $ Parsec.noneOf specialChar >>= \case -- Handle backslashes: -- - Fail on forbidden escape characters. -- - Non-forbidden characters: simply unescape, e.g. parse "\b" as 'b', -- - Trailing backslash: treat it as a raw backslash, not an escape -- sequence. (This is the logic that this parser followed when this -- comment was written; it is not necessarily intentional but now I -- don't want to break anything relying on it.) '\\' -> Parsec.noneOf restrictedEscapes <|> Parsec.eof $> '\\' c -> pure c {- | Emphasis parser. >>> parseString "/Hello world/" DocEmphasis (DocString "Hello world") -} emphasis :: Parser (DocMarkup mod Identifier) emphasis = DocEmphasis . parseAll <$> disallowNewline ("/" *> takeWhile1_ (/= '/') <* "/") {- | Bold parser. >>> parseString "__Hello world__" DocBold (DocString "Hello world") -} bold :: Parser (DocMarkup mod Identifier) bold = DocBold . parseAll <$> disallowNewline ("__" *> takeUntil "__") {- | Monospaced strings. >>> parseString "@cruel@" DocMonospaced (DocString "cruel") -} monospace :: Parser (DocMarkup mod Identifier) monospace = DocMonospace . parseAll <$> ("@" *> takeWhile1_ (/= '@') <* "@") {- | Text anchors to allow for jumping around the generated documentation. >>> parseString "#Hello world#" DocAName "Hello world" -} anchor :: Parser (DocMarkup mod a) anchor = DocAnchor . Text.unpack <$> ("#" *> takeWhile1_ (\x -> x /= '#' && not (Char.isSpace x)) <* "#") -- | Parses identifiers with help of 'parseValid'. identifier :: Parser (DocMarkup mod Identifier) identifier = DocIdentifier <$> parseValid {- | Module names. Note that we allow '#' and '\' to support anchors (old style anchors are of the form "SomeModule\#anchor"). -} moduleName :: Parser (DocMarkup mod a) moduleName = DocModule . flip ModuleLink Nothing <$> ("\"" *> moduleNameString <* "\"") -- | A module name, optionally with an anchor moduleNameString :: Parser String moduleNameString = moduleId `maybeFollowedBy` anchor_ where moduleId = intercalate "." <$> conid `Parsec.sepBy1` "." anchor_ = (++) <$> (Parsec.string "#" <|> Parsec.string "\\#") <*> many (Parsec.satisfy (\c -> c /= '"' && not (Char.isSpace c))) maybeFollowedBy pre suf = (\x -> maybe x (x ++)) <$> pre <*> optional suf conid = (:) <$> Parsec.satisfy (\c -> Char.isAlpha c && Char.isUpper c) <*> many conChar conChar = Parsec.alphaNum <|> Parsec.char '_' ------------------------ -- Markup components ------------------------ {- | List of characters that we use to delimit any special markup. Once we have checked for any of these and tried to parse the relevant markup, we can assume they are used as regular text. -} specialChar :: [Char] specialChar = "_/<@\"&'`#[ " ------------------------ -- Helpers ------------------------ parse' :: Parser a -> Text -> Either String (ParserState, a) parse' parser t = let parser' = (,) <$> parser <*> Parsec.getState in case Parsec.runParser parser' initialParserState "" t of Left e -> Left (show e) Right (x, s) -> Right (s, x) docConcat :: [DocMarkup mod id] -> DocMarkup mod id docConcat = foldr docAppend DocEmpty where -- Prevent doc append from becoming too nested docAppend (DocDefinitionList ds1) (DocDefinitionList ds2) = DocDefinitionList (ds1 <> ds2) docAppend (DocDefinitionList ds1) (DocAppend (DocDefinitionList ds2) d) = DocAppend (DocDefinitionList (ds1 <> ds2)) d docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 <> ds2) docAppend (DocOrderedList ds1) (DocAppend (DocOrderedList ds2) d) = DocAppend (DocOrderedList (ds1 <> ds2)) d docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 <> ds2) docAppend (DocUnorderedList ds1) (DocAppend (DocUnorderedList ds2) d) = DocAppend (DocUnorderedList (ds1 <> ds2)) d docAppend DocEmpty d = d docAppend d DocEmpty = d docAppend (DocString s1) (DocString s2) = DocString (s1 <> s2) docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 <> s2)) docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 <> s2)) d docAppend d1 d2 = DocAppend d1 d2 {- | Parse a text paragraph. Actually just a wrapper over 'parseAll' which drops leading whitespace. -} parseText :: Text -> DocMarkup mod Identifier parseText = parseAll . Text.dropWhile Char.isSpace . Text.filter (/= '\r') parseAll :: Text -> DocMarkup mod Identifier parseAll = snd . parse myParser where -- docConcat -- <$> many -- ( choice' -- [ monospace -- , anchor -- , identifier -- , moduleName -- , picture -- , mathDisplay -- , mathInline -- , markdownImage -- , markdownLink -- , hyperlink -- , bold -- , emphasis -- , encodedChar -- , string' -- , skipSpecialChar -- ] -- ) myParser :: Parser (DocMarkup mod Identifier) myParser = docConcat <$> many ( choice' [ monospace , anchor , identifier , moduleName , bold , emphasis , string' , skipSpecialChar ] ) choice' :: [Parser a] -> Parser a choice' [] = empty choice' [p] = p choice' (p : ps) = Parsec.try p <|> choice' ps disallowNewline :: Parser Text -> Parser Text disallowNewline = mfilter (Text.all (/= '\n'))