Init

2025-09-21 08:19:01 +02:00 · 2025-09-21 08:19:01 +02:00 · c9f61c4e06
commit c9f61c4e06
16 changed files with 1366 additions and 0 deletions
--- a/src/Identifier.hs
+++ b/src/Identifier.hs
@ -0,0 +1,157 @@
+module Identifier (
+  Identifier (..),
+  Namespace (..),
+  parseValid,
+) where
+
+import Data.Text qualified as Text
+import Text.Parsec qualified as Parsec
+
+import Control.Monad (guard)
+import Data.Char (isAlpha, isAlphaNum)
+import Data.Functor (($>))
+import Data.Maybe (listToMaybe, maybeToList)
+import Data.Text (Text)
+import ParserMonad
+import Text.Parsec (State (..))
+import Text.Parsec.Pos (updatePosChar)
+import Text.Read.Lex (isSymbolChar)
+
+-- | Identifier string surrounded with namespace, opening, and closing quotes/backticks.
+data Identifier = Identifier !Namespace !Char String !Char
+  deriving (Show, Eq)
+
+-- | The namespace qualification for an identifier.
+data Namespace = Value | Type | None deriving (Eq, Ord, Enum, Show)
+
+parseValid :: Parser Identifier
+parseValid = do
+  state@State{stateInput = input, statePos = pos} <- Parsec.getParserState
+
+  case takeIdentifier input of
+    Nothing -> Parsec.parserFail "parseValid: Failed to match a valid identifier"
+    Just (namespace, op, ident, cl, input') ->
+      let posOp = updatePosChar pos op
+          posIdent = Text.foldl updatePosChar posOp ident
+          posCl = updatePosChar posIdent cl
+          newState = state{stateInput = input', statePos = posCl}
+       in Parsec.setParserState newState $> Identifier namespace op (Text.unpack ident) cl
+
+{- | Try to parse a delimited identifier off the front of the given input.
+
+This tries to match as many valid Haskell identifiers/operators as possible,
+to the point of sometimes accepting invalid things (ex: keywords). Some
+considerations:
+
+  - operators and identifiers can have module qualifications
+  - operators can be wrapped in parens (for prefix)
+  - identifiers can be wrapped in backticks (for infix)
+  - delimiters are backticks or regular ticks
+  - since regular ticks are also valid in identifiers, we opt for the
+    longest successful parse
+
+This function should make /O(1)/ allocations
+-}
+takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text)
+takeIdentifier input = listToMaybe $ do
+  -- Optional namespace
+  let (namespace, input') = case Text.uncons input of
+        Just ('v', i) -> (Value, i)
+        Just ('t', i) -> (Type, i)
+        _ -> (None, input)
+  -- Opening tick
+  (op, input'') <- maybeToList (Text.uncons input')
+  guard (op == '\'' || op == '`')
+
+  -- Identifier/operator
+  (ident, input''') <- wrapped input''
+
+  -- Closing tick
+  (cl, input'''') <- maybeToList (Text.uncons input''')
+  guard (cl == '\'' || cl == '`')
+
+  pure (namespace, op, ident, cl, input'''')
+ where
+  -- \| Parse out a wrapped, possibly qualified, operator or identifier
+  wrapped t = do
+    (c, t') <- maybeToList (Text.uncons t)
+    -- Tuples
+    case c of
+      '('
+        | Just (c', _) <- Text.uncons t'
+        , c' == ',' || c' == ')' ->
+            do
+              let (commas, t'') = Text.span (== ',') t'
+              (')', t''') <- maybeToList (Text.uncons t'')
+              pure (Text.take (Text.length commas + 2) t, t''')
+
+      -- Parenthesized
+      '(' -> do
+        (n, t'') <- general False 0 [] t'
+        (')', t''') <- maybeToList (Text.uncons t'')
+        pure (Text.take (n + 2) t, t''')
+
+      -- Backticked
+      '`' -> do
+        (n, t'') <- general False 0 [] t'
+        ('`', t''') <- maybeToList (Text.uncons t'')
+        pure (Text.take (n + 2) t, t''')
+
+      -- Unadorned
+      _ -> do
+        (n, t'') <- general False 0 [] t
+        pure (Text.take n t, t'')
+
+  -- \| Parse out a possibly qualified operator or identifier
+  general ::
+    Bool ->
+    -- \^ refuse inputs starting with operators
+    Int ->
+    -- \^ total characters \"consumed\" so far
+    [(Int, Text)] ->
+    -- \^ accumulated results
+    Text ->
+    -- \^ current input
+    [(Int, Text)]
+  -- \^ total characters parsed & what remains
+  general !identOnly !i acc t
+    -- Starts with an identifier (either just an identifier, or a module qual)
+    | Just (n, rest) <- identLike t =
+        if Text.null rest
+          then acc
+          else case Text.head rest of
+            '`' -> (n + i, rest) : acc
+            ')' -> (n + i, rest) : acc
+            '.' -> general False (n + i + 1) acc (Text.tail rest)
+            '\'' ->
+              let (m, rest') = quotes rest
+               in general True (n + m + 1 + i) ((n + m + i, rest') : acc) (Text.tail rest')
+            _ -> acc
+    -- An operator
+    | Just (n, rest) <- optr t
+    , not identOnly =
+        (n + i, rest) : acc
+    -- Anything else
+    | otherwise =
+        acc
+
+  -- \| Parse an identifier off the front of the input
+  identLike t
+    | Text.null t = Nothing
+    | isAlpha (Text.head t) || '_' == Text.head t =
+        let !(idt, rest) = Text.span (\c -> isAlphaNum c || c == '_') t
+            !(octos, rest') = Text.span (== '#') rest
+         in Just (Text.length idt + Text.length octos, rest')
+    | otherwise = Nothing
+
+  -- \| Parse all but the last quote off the front of the input
+  -- PRECONDITION: T.head t `elem` ['\'', '`']
+  quotes :: Text -> (Int, Text)
+  quotes t =
+    let !n = Text.length (Text.takeWhile (`elem` ['\'', '`']) t) - 1
+     in (n, Text.drop n t)
+
+  -- \| Parse an operator off the front of the input
+  optr t =
+    let !(op, rest) = Text.span isSymbolChar t
+     in if Text.null op then Nothing else Just (Text.length op, rest)
--- a/src/Lexer.hs
+++ b/src/Lexer.hs
@ -0,0 +1,190 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module Lexer (
+  Token (..),
+  lexer,
+  emphasis,
+) where
+
+import Control.Monad (mfilter)
+import Data.Char (isAlphaNum, isPrint)
+import Data.Functor (($>))
+import Data.Text (Text)
+import Data.Text qualified as Text
+import GHC.Stack (HasCallStack)
+import ParserMonad (Parser, initialParserState)
+import Text.Parsec
+import Text.Parsec qualified as Parsec
+import Text.Parsec.Pos (updatePosChar)
+
+type LocatedToken = (SourcePos, Token)
+
+type Lexer = Parser [LocatedToken]
+
+data Token
+  = Token Text
+  | Anchor
+  | AngleOpen
+  | AngleClose
+  | BoldOpen
+  | BoldClose
+  | BracketOpen
+  | BracketClose
+  | EmphasisOpen
+  | EmphasisClose
+  | MonospaceOpen
+  | MonospaceClose
+  | Newline
+  | ParenOpen
+  | ParenClose
+  | QuoteOpen
+  | QuoteClose
+  | Space
+  | EOF
+  deriving (Eq, Show)
+
+lexer :: String -> Either ParseError [LocatedToken]
+lexer = Parsec.runParser lexText initialParserState "input" . Text.pack
+
+lexText :: (HasCallStack) => Parser [LocatedToken]
+lexText = go
+ where
+  go = do
+    Parsec.optionMaybe Parsec.eof >>= \case
+      Just _ -> pure []
+      Nothing -> do
+        toks <-
+          choice
+            [ newlineToken
+            , spaceToken
+            , textElement
+            , identifier
+            , other
+            ]
+        rest <- go
+        pure (toks <> rest)
+
+match :: Parser a -> Parser (Text, a)
+match p = do
+  input <- getInput
+  result <- p
+  input' <- getInput
+  let !consumed = Text.take (Text.length input - Text.length input') input
+  pure (consumed, result)
+
+-- Tokens
+
+textElement :: Parser [LocatedToken]
+textElement =
+  choice $
+    Parsec.try
+      <$> [ emphasis
+          , bold
+          , monospace
+          , parens
+          , brackets
+          , angles
+          ]
+
+delimited :: String -> String -> Token -> Token -> Parser [LocatedToken]
+delimited c1 c2 ot ct = do
+  pos <- getPosition
+  (_, content) <- match $ between op cl any'
+
+  innerToks <- case lexer $ Text.unpack content of
+    Left _ -> do
+      pos' <- getPosition
+      pure $ [(pos', Token content)]
+    Right toks -> pure toks
+
+  let openTok :: LocatedToken = (pos, ot)
+      closeTok :: LocatedToken = (pos, ct)
+
+  pure $ openTok : innerToks <> [closeTok]
+ where
+  op = string c1
+  cl = string c2
+  any' = Text.pack <$> manyTill anyChar (lookAhead cl)
+
+delimited' :: String -> Token -> Token -> Parser [LocatedToken]
+delimited' s t1 t2 = delimited s s t1 t2
+
+emphasis :: Lexer
+emphasis = delimited' "/" EmphasisOpen EmphasisClose
+
+bold :: Lexer
+bold = delimited' "__" BoldOpen BoldClose
+
+monospace :: Lexer
+monospace = delimited' "@" MonospaceOpen MonospaceClose
+
+parens :: Parser [LocatedToken]
+parens = delimited "(" ")" ParenOpen ParenClose
+
+brackets :: Lexer
+brackets = delimited "[" "]" ParenOpen ParenClose
+
+angles :: Parser [LocatedToken]
+angles = delimited "<" ">" AngleOpen AngleClose
+
+other :: Lexer
+other = do
+  pos <- getPosition
+  c <- takeWhile1_ isPrint
+  pure . pure $ (pos, Token c)
+
+spaceToken :: Lexer
+spaceToken = do
+  pos <- getPosition
+  _ <- many1 (char ' ')
+  pure . pure $ (pos, Space)
+
+newlineToken :: Lexer
+newlineToken = do
+  pos <- getPosition
+  _ <- newline
+  pure . pure $ (pos, Newline)
+
+identifier :: Lexer
+identifier = do
+  pos <- getPosition
+  txt <- takeWhile1_ isAlphaNum
+  pure . pure $ (pos, Token txt)
+
+-------
+-- Helpers
+-------
+
+-- | Like `takeWhile`, but unconditionally take escaped characters.
+takeWhile_ :: (Char -> Bool) -> Parser Text
+takeWhile_ p = scan p_ False
+ where
+  p_ escaped c
+    | escaped = Just False
+    | not $ p c = Nothing
+    | otherwise = Just (c == '\\')
+
+-- | Like 'takeWhile1', but unconditionally take escaped characters.
+takeWhile1_ :: (Char -> Bool) -> Parser Text
+takeWhile1_ = mfilter (not . Text.null) . takeWhile_
+
+{- | Scan the input text, accumulating characters as long as the scanning
+function returns true.
+-}
+scan ::
+  -- | scan function
+  (state -> Char -> Maybe state) ->
+  -- | initial state
+  state ->
+  Parser Text
+scan f initState = do
+  parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
+  (remaining, finalPos, ct) <- go input initState pos 0
+  let newState = parserState{stateInput = remaining, statePos = finalPos}
+  Parsec.setParserState newState $> Text.take ct input
+ where
+  go !input' !st !posAccum !count' = case Text.uncons input' of
+    Nothing -> pure (input', posAccum, count')
+    Just (char', input'') -> case f st char' of
+      Nothing -> pure (input', posAccum, count')
+      Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
--- a/src/Parser.hs
+++ b/src/Parser.hs
@ -0,0 +1,228 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module Parser (
+  parse,
+  parseText,
+) where
+
+import Identifier
+import ParserMonad
+import Types
+
+import Control.Applicative
+import Control.Monad
+import Data.Char qualified as Char
+import Data.Functor (($>))
+import Data.List (intercalate)
+import Data.Text (Text)
+import Data.Text qualified as Text
+import Parser.Util
+import Text.Parsec qualified as Parsec
+
+parse :: Parser a -> Text -> (ParserState, a)
+parse parser = either err id . parse' (parser <* Parsec.eof)
+ where
+  err = error . ("Haddock.Parser.parse: " ++)
+
+--------------------
+-- Markup
+--------------------
+
+{- | Skips a single special character and treats it as a plain string.
+This is done to skip over any special characters belonging to other
+elements but which were not deemed meaningful at their positions.
+-}
+skipSpecialChar :: Parser (DocMarkup mod a)
+skipSpecialChar = DocString . pure <$> Parsec.oneOf specialChar
+
+{- | Plain, regular parser for text. Called as one of the last parsers
+to ensure that we have already given a chance to more meaningful parsers
+before capturing their characters.
+-}
+string' :: Parser (DocMarkup mod a)
+string' =
+  DocString
+    -- After the first character, stop for @\(@ or @\[@ math starters. (The
+    -- first character won't start a valid math string because this parser
+    -- should follow math parsers. But this parser is expected to accept at
+    -- least one character from all inputs that don't start with special
+    -- characters, so the first character parser can't have the @"(["@
+    -- restriction.)
+    <$> ((:) <$> rawOrEscChar "" <*> many (rawOrEscChar "(["))
+ where
+  -- \| Parse a single logical character, either raw or escaped. Don't accept
+  -- escaped characters from the argument string.
+  rawOrEscChar :: [Char] -> Parser Char
+  rawOrEscChar restrictedEscapes =
+    Parsec.try $
+      Parsec.noneOf specialChar >>= \case
+        -- Handle backslashes:
+        --   - Fail on forbidden escape characters.
+        --   - Non-forbidden characters: simply unescape, e.g. parse "\b" as 'b',
+        --   - Trailing backslash: treat it as a raw backslash, not an escape
+        --     sequence. (This is the logic that this parser followed when this
+        --     comment was written; it is not necessarily intentional but now I
+        --     don't want to break anything relying on it.)
+        '\\' -> Parsec.noneOf restrictedEscapes <|> Parsec.eof $> '\\'
+        c -> pure c
+
+{- | Emphasis parser.
+
+>>> parseString "/Hello world/"
+DocEmphasis (DocString "Hello world")
+-}
+emphasis :: Parser (DocMarkup mod Identifier)
+emphasis =
+  DocEmphasis . parseAll
+    <$> disallowNewline ("/" *> takeWhile1_ (/= '/') <* "/")
+
+{- | Bold parser.
+
+>>> parseString "__Hello world__"
+DocBold (DocString "Hello world")
+-}
+bold :: Parser (DocMarkup mod Identifier)
+bold = DocBold . parseAll <$> disallowNewline ("__" *> takeUntil "__")
+
+{- | Monospaced strings.
+
+>>> parseString "@cruel@"
+DocMonospaced (DocString "cruel")
+-}
+monospace :: Parser (DocMarkup mod Identifier)
+monospace =
+  DocMonospace . parseAll
+    <$> ("@" *> takeWhile1_ (/= '@') <* "@")
+
+{- | Text anchors to allow for jumping around the generated documentation.
+
+>>> parseString "#Hello world#"
+DocAName "Hello world"
+-}
+anchor :: Parser (DocMarkup mod a)
+anchor =
+  DocAnchor . Text.unpack
+    <$> ("#" *> takeWhile1_ (\x -> x /= '#' && not (Char.isSpace x)) <* "#")
+
+-- | Parses identifiers with help of 'parseValid'.
+identifier :: Parser (DocMarkup mod Identifier)
+identifier = DocIdentifier <$> parseValid
+
+{- | Module names.
+
+Note that we allow '#' and '\' to support anchors (old style anchors are of
+the form "SomeModule\#anchor").
+-}
+moduleName :: Parser (DocMarkup mod a)
+moduleName = DocModule . flip ModuleLink Nothing <$> ("\"" *> moduleNameString <* "\"")
+
+-- | A module name, optionally with an anchor
+moduleNameString :: Parser String
+moduleNameString = moduleId `maybeFollowedBy` anchor_
+ where
+  moduleId = intercalate "." <$> conid `Parsec.sepBy1` "."
+  anchor_ =
+    (++)
+      <$> (Parsec.string "#" <|> Parsec.string "\\#")
+      <*> many (Parsec.satisfy (\c -> c /= '"' && not (Char.isSpace c)))
+
+  maybeFollowedBy pre suf = (\x -> maybe x (x ++)) <$> pre <*> optional suf
+
+  conid =
+    (:)
+      <$> Parsec.satisfy (\c -> Char.isAlpha c && Char.isUpper c)
+      <*> many conChar
+
+  conChar = Parsec.alphaNum <|> Parsec.char '_'
+
+------------------------
+-- Markup components
+------------------------
+
+{- | List of characters that we use to delimit any special markup.
+Once we have checked for any of these and tried to parse the
+relevant markup, we can assume they are used as regular text.
+-}
+specialChar :: [Char]
+specialChar = "_/<@\"&'`#[ "
+
+------------------------
+--  Helpers
+------------------------
+
+parse' :: Parser a -> Text -> Either String (ParserState, a)
+parse' parser t =
+  let parser' = (,) <$> parser <*> Parsec.getState
+   in case Parsec.runParser parser' initialParserState "<haddock>" t of
+        Left e -> Left (show e)
+        Right (x, s) -> Right (s, x)
+
+docConcat :: [DocMarkup mod id] -> DocMarkup mod id
+docConcat = foldr docAppend DocEmpty
+ where
+  -- Prevent doc append from becoming too nested
+  docAppend (DocDefinitionList ds1) (DocDefinitionList ds2) = DocDefinitionList (ds1 <> ds2)
+  docAppend (DocDefinitionList ds1) (DocAppend (DocDefinitionList ds2) d) = DocAppend (DocDefinitionList (ds1 <> ds2)) d
+  docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 <> ds2)
+  docAppend (DocOrderedList ds1) (DocAppend (DocOrderedList ds2) d) = DocAppend (DocOrderedList (ds1 <> ds2)) d
+  docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 <> ds2)
+  docAppend (DocUnorderedList ds1) (DocAppend (DocUnorderedList ds2) d) = DocAppend (DocUnorderedList (ds1 <> ds2)) d
+  docAppend DocEmpty d = d
+  docAppend d DocEmpty = d
+  docAppend (DocString s1) (DocString s2) = DocString (s1 <> s2)
+  docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 <> s2))
+  docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 <> s2)) d
+  docAppend d1 d2 = DocAppend d1 d2
+
+{- | Parse a text paragraph. Actually just a wrapper over 'parseAll' which
+drops leading whitespace.
+-}
+parseText :: Text -> DocMarkup mod Identifier
+parseText = parseAll . Text.dropWhile Char.isSpace . Text.filter (/= '\r')
+
+parseAll :: Text -> DocMarkup mod Identifier
+parseAll = snd . parse myParser
+ where
+  -- docConcat
+  --   <$> many
+  --     ( choice'
+  --         [ monospace
+  --         , anchor
+  --         , identifier
+  --         , moduleName
+  --         , picture
+  --         , mathDisplay
+  --         , mathInline
+  --         , markdownImage
+  --         , markdownLink
+  --         , hyperlink
+  --         , bold
+  --         , emphasis
+  --         , encodedChar
+  --         , string'
+  --         , skipSpecialChar
+  --         ]
+  --     )
+  myParser :: Parser (DocMarkup mod Identifier)
+  myParser =
+    docConcat
+      <$> many
+        ( choice'
+            [ monospace
+            , anchor
+            , identifier
+            , moduleName
+            , bold
+            , emphasis
+            , string'
+            , skipSpecialChar
+            ]
+        )
+
+choice' :: [Parser a] -> Parser a
+choice' [] = empty
+choice' [p] = p
+choice' (p : ps) = Parsec.try p <|> choice' ps
+
+disallowNewline :: Parser Text -> Parser Text
+disallowNewline = mfilter (Text.all (/= '\n'))
--- a/src/Parser/Util.hs
+++ b/src/Parser/Util.hs
@ -0,0 +1,65 @@
+module Parser.Util where
+
+import Control.Monad (mfilter)
+import Data.Functor (($>))
+import Data.Text (Text)
+import Data.Text qualified as Text
+import ParserMonad (Parser)
+import Text.Parsec (State (..))
+import Text.Parsec qualified as Parsec
+import Text.Parsec.Pos (updatePosChar)
+
+{- | Consume characters from the input up to and including the given pattern.
+Return everything consumed except for the end pattern itself.
+-}
+takeUntil :: Text -> Parser Text
+takeUntil end_ = Text.dropEnd (Text.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome
+ where
+  end = Text.unpack end_
+
+  p :: (Bool, String) -> Char -> Maybe (Bool, String)
+  p acc c = case acc of
+    (True, _) -> Just (False, end)
+    (_, []) -> Nothing
+    (_, x : xs) | x == c -> Just (False, xs)
+    _ -> Just (c == '\\', end)
+
+  requireEnd = mfilter (Text.isSuffixOf end_)
+
+  gotSome xs
+    | Text.null xs = fail "didn't get any content"
+    | otherwise = return xs
+
+-- | Like `takeWhile`, but unconditionally take escaped characters.
+takeWhile_ :: (Char -> Bool) -> Parser Text
+takeWhile_ p = scan p_ False
+ where
+  p_ escaped c
+    | escaped = Just False
+    | not $ p c = Nothing
+    | otherwise = Just (c == '\\')
+
+-- | Like 'takeWhile1', but unconditionally take escaped characters.
+takeWhile1_ :: (Char -> Bool) -> Parser Text
+takeWhile1_ = mfilter (not . Text.null) . takeWhile_
+
+{- | Scan the input text, accumulating characters as long as the scanning
+function returns true.
+-}
+scan ::
+  -- | scan function
+  (state -> Char -> Maybe state) ->
+  -- | initial state
+  state ->
+  Parser Text
+scan f initState = do
+  parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
+  (remaining, finalPos, ct) <- go input initState pos 0
+  let newState = parserState{stateInput = remaining, statePos = finalPos}
+  Parsec.setParserState newState $> Text.take ct input
+ where
+  go !input' !st !posAccum !count' = case Text.uncons input' of
+    Nothing -> pure (input', posAccum, count')
+    Just (char', input'') -> case f st char' of
+      Nothing -> pure (input', posAccum, count')
+      Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
--- a/src/ParserMonad.hs
+++ b/src/ParserMonad.hs
@ -0,0 +1,26 @@
+module ParserMonad where
+
+import Data.String
+import Data.Text (Text)
+import Data.Text qualified as Text
+import Text.Parsec (Parsec)
+import Text.Parsec qualified as Parsec
+
+import Types
+
+type Parser = Parsec Text ParserState
+
+instance (a ~ Text) => IsString (Parser a) where
+  fromString = fmap Text.pack . Parsec.string
+
+{- | The only bit of information we really care about trudging along with us
+through parsing is the version attached to a @\@since@ annotation - if
+the doc even contained one.
+-}
+newtype ParserState = ParserState
+  { since :: Maybe Since
+  }
+  deriving (Eq, Show)
+
+initialParserState :: ParserState
+initialParserState = ParserState Nothing
--- a/src/Types.hs
+++ b/src/Types.hs
@ -0,0 +1,196 @@
+module Types (
+  DocMarkup (..),
+  Document (..),
+  Meta (..),
+  ModuleLink (..),
+  Package,
+  Since (..),
+  Version,
+)
+where
+
+newtype Document = Document
+  { meta :: Meta
+  }
+  deriving (Eq, Show)
+
+newtype Meta = Meta
+  { since :: Maybe Since
+  }
+  deriving (Eq, Show)
+
+data Since = Since
+  { package :: Maybe Package
+  -- ^ optional package qualification
+  , version :: Version
+  }
+  deriving (Eq, Show)
+
+-- Could have a better type?
+type Version = [Int]
+type Package = String
+
+data DocMarkup mod id
+  = DocEmpty
+  | -- | This is not represented in the markup language, this is for internal use
+    DocAppend (DocMarkup mod id) (DocMarkup mod id)
+  | -- | Any text that doesn't match any rules is a bare string
+    DocString String
+  | -- | Paragraphs are demarcated by blank lines
+    DocParagraph (DocMarkup mod id)
+  | -- | A haskell identifier
+    DocIdentifier id
+  | -- | A qualified identifier that couldn't be resolved.
+    DocIdentifierUnchecked
+  | -- | A link to a module, might include a label
+    DocModule (ModuleLink (DocMarkup mod id))
+  | -- | Emphasis /italics/
+    DocEmphasis (DocMarkup mod id)
+  | -- | Monospaced @source code@
+    DocMonospace (DocMarkup mod id)
+  | -- | Bold __bold text__
+    DocBold (DocMarkup mod id)
+  | {- | Unordered lists
+      * this
+      or
+      - this
+    -}
+    DocUnorderedList [DocMarkup mod id]
+  | {- | Ordered lists
+      1. this
+      or
+      (1) this
+    -}
+    DocOrderedList [(Int, DocMarkup mod id)]
+  | {- | Definition lists
+      [term] a term
+      [another term] another definition
+    -}
+    DocDefinitionList [(DocMarkup mod id, DocMarkup mod id)]
+  | {- | Code blocks
+      @
+      a code block in here
+      with multiple lines
+      @
+
+      Or with bird tracks:
+      > some code
+      > goes here
+    -}
+    DocCodeBlock (DocMarkup mod id)
+  | {- | Hyperlinks
+      __marked__:
+      <http://example.com>
+      <http://example.com label text>
+      __Auto-detected URLs__:
+      http://example.com
+      https://example.com
+      ftp://example.com
+      __Markdown style__
+      [link text](http://example.com)
+      [link text]("Module.Name")
+    -}
+    DocHyperlink (Hyperlink (DocMarkup mod id))
+  | {- | Pictures
+        <<image.png>>
+        <<image.png title text>>
+
+        __Markdown Images__
+
+        ![alt text](image.png)
+    -}
+    DocPicture Picture
+  | {- | Inline math expressions
+      \(mathematical expression\)
+    -}
+    DocMathInline String
+  | {- | Math multiline display
+    \[
+      mathematical expression
+      in multiple lines
+    \]
+    -}
+    DocMathDisplay String
+  | {- | Anchors, no spaces allowed
+      #anchor-name#
+    -}
+    DocAnchor String
+  | {- | Property descriptions
+      prop> property description
+    -}
+    DocProperty String
+  | {- | Examples
+      >>> expression
+      result line 1
+      result line 2
+    -}
+    DocExamples [Example]
+  | -- | Header
+    DocHeader (Header (DocMarkup mod id))
+  | -- Table
+    DocTable (Table (DocMarkup mod id))
+  deriving (Eq, Show)
+
+instance Semigroup (DocMarkup mod id) where
+  (<>) = DocAppend
+
+instance Monoid (DocMarkup mod id) where
+  mempty = DocEmpty
+  mconcat = foldr (<>) mempty
+
+data ModuleLink id = ModuleLink
+  { name :: String
+  , label :: Maybe id
+  }
+  deriving (Eq, Show)
+
+data Picture = Picture
+  { uri :: String
+  , title :: Maybe String
+  }
+  deriving (Eq, Show)
+
+data Hyperlink id = Hyperlink
+  { url :: String
+  , label :: Maybe id
+  }
+  deriving (Eq, Show, Functor, Foldable, Traversable)
+
+data TableCell id = TableCell
+  { col :: Int
+  , row :: Int
+  , content :: id
+  }
+  deriving (Eq, Show, Functor, Foldable, Traversable)
+
+newtype TableRow id = TableRow
+  { rows :: [TableCell id]
+  }
+  deriving (Eq, Show, Functor, Foldable, Traversable)
+
+data Table id = Table
+  { headerRows :: [TableRow id]
+  , bodyRows :: [TableRow id]
+  }
+  deriving (Eq, Show, Functor, Foldable, Traversable)
+
+data Example = Example
+  { exampleExpression :: String
+  , exampleResult :: [String]
+  }
+  deriving (Eq, Show)
+
+data Header id = Header
+  { level :: HeaderLevel
+  , title :: id
+  }
+  deriving (Eq, Show)
+
+data HeaderLevel
+  = H1
+  | H2
+  | H3
+  | H4
+  | H5
+  | H6
+  deriving (Eq, Show, Bounded, Enum)