Init

2025-09-21 08:19:01 +02:00 · 2025-09-21 08:19:01 +02:00 · c9f61c4e06
commit c9f61c4e06
16 changed files with 1366 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 dist-newstyle
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1,5 @@
 # Revision history for haddock2
 ## 0.1.0.0 -- YYYY-mm-dd
 * First version. Released on an unsuspecting world.
--- a/Grammar.ebnf
+++ b/Grammar.ebnf
@ -0,0 +1,88 @@
 document ::= paragraph*
 paragraph ::= ( text_paragraph | code_block | header | list | table ) newline?
 text_paragraph ::= text_element+
 text_element ::= emphasis | bold | monospace | link | anchor | identifier |
                 inline_math | image | plain_text | escaped_char
 emphasis ::= '/' text_no_newline '/'
 bold ::= '__' text_no_newline '__'
 monospace ::= '@' text_content '@'
 link ::= module_link | hyperlink | markdown_link
 module_link ::= '"' module_name ( '#' anchor_name )? '"'
 hyperlink ::= '<' url ( ' ' link_text )? '>'
 markdown_link ::= '[' link_text '](' ( url | module_link ) ')'
 anchor ::= '#' anchor_name '#'
 identifier ::= "'" haskell_id "'"
 inline_math ::= '\(' math_expr '\)'
 image ::= '<<' image_path ( ' ' image_title )? '>>' |
          '![' alt_text '](' image_path ')'
 code_block ::= at_block | bird_tracks | example_block | property_block
 at_block ::= '@' newline code_content newline '@'
 bird_tracks ::= ( '>' ' '? code_line newline )+
 example_block ::= ( '>>>' ' ' expression newline result_line* )+
 property_block ::= 'prop>' ' ' property_desc newline
 header ::= header_marker ' ' header_text newline
 header_marker ::= '=' | '==' | '===' | '====' | '=====' | '======'
 list ::= unordered_list | ordered_list | definition_list
 unordered_list ::= ( list_marker ' ' list_content )+
 list_marker ::= '*' | '-'
 ordered_list ::= ( number_marker ' ' list_content )+
 number_marker ::= digit+ '.' | '(' digit+ ')'
 definition_list ::= ( '[' term ']' ':'? ' ' definition_content )+
 table ::= table_border header_row header_sep data_row* table_border
 table_border ::= '+' ( '-' | '+' )* newline
 header_row ::= '|' ( table_cell '|' )* newline
 header_sep ::= '+' ( '=' | '+' )* newline
 data_row ::= '|' ( table_cell '|' )* newline
 plain_text ::= text_char+
 text_char ::= letter | digit | ' ' | punctuation
 text_no_newline ::= ( letter | digit | ' ' | safe_punctuation )+
 text_content ::= ( letter | digit | ' ' | newline | punctuation )*
 code_content ::= code_char*
 code_char ::= letter | digit | ' ' | newline | punctuation
 code_line ::= ( letter | digit | ' ' | punctuation )*
 list_content ::= text_element* ( newline '    ' text_element* )?
 table_cell ::= ' ' table_char* ' '
 table_char ::= letter | digit | ' ' | safe_punctuation
 header_text ::= ( letter | digit | ' ' | punctuation )+
 link_text ::= link_char+
 link_char ::= letter | digit | ' ' | safe_punctuation
 alt_text ::= alt_char+
 alt_char ::= letter | digit | ' ' | safe_punctuation
 image_path ::= ( letter | digit | '.' | '/' | '-' | '_' )+
 image_title ::= image_title_char+
 image_title_char ::= letter | digit | ' ' | safe_punctuation
 anchor_name ::= letter ( letter | digit | '-' )*
 haskell_id ::= letter ( letter | digit | '_' | "'" )*
 module_name ::= haskell_id ( '.' haskell_id )*
 url ::= ( 'http://' | 'https://' | 'ftp://' ) url_char+
 url_char ::= letter | digit | '/' | '.' | ':' | '-' | '_' | '?' | '&' | '='
 math_expr ::= math_char+
 math_char ::= letter | digit | ' ' | math_punctuation
 property_desc ::= ( letter | digit | ' ' | punctuation )+
 expression ::= ( letter | digit | ' ' | punctuation )+
 result_line ::= result_char* newline
 result_char ::= letter | digit | ' ' | safe_punctuation
 term ::= term_char+
 term_char ::= letter | digit | ' ' | safe_punctuation
 definition_content ::= text_element+
 escaped_char ::= '\' special_char
 special_char ::= '/' | '*' | '@' | "'" | '"' | '#' | '<' | '>' | '[' | ']' | '(' | ')' | '|' | '=' | '-' | '+'
 safe_punctuation ::= '!' | '$' | '%' | '^' | '&' | '(' | ')' | '_' | '+' | '{' | '}' | ';' | ':' | ',' | '.' | '`' | '~'
 math_punctuation ::= '+' | '-' | '*' | '/' | '^' | '_' | '=' | '(' | ')' | '{' | '}' | '[' | ']'
 punctuation ::= '!' | '@' | '#' | '$' | '%' | '^' | '&' | '*' | '(' | ')' | '-' | '_' | '+' | '=' | '{' | '}' | '[' | ']' | '|' | '\' | ':' | ';' | '"' | "'" | '<' | '>' | '?' | ',' | '.' | '/' | '`' | '~'
 letter ::= 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z'
 digit ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
 newline ::= #xA | #xD #xA | #xD
--- a/29
+++ b/29
@ -0,0 +1,29 @@
 Copyright (c) 2025, Igor Ranieri
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above
      copyright notice, this list of conditions and the following
      disclaimer in the documentation and/or other materials provided
      with the distribution.
    * Neither the name of the copyright holder nor the names of its
      contributors may be used to endorse or promote products derived
      from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/app/Main.hs
+++ b/app/Main.hs
@ -0,0 +1,4 @@
 module Main where
 main :: IO ()
 main = putStrLn "Hello, Haskell!"
--- a/cabal.project.local
+++ b/cabal.project.local
@ -0,0 +1 @@
 tests: True
--- a/haddock2.cabal
+++ b/haddock2.cabal
@ -0,0 +1,55 @@
 cabal-version: 3.4
 name: haddock2
 version: 0.1.0.0
 license: BSD-3-Clause
 license-file: LICENSE
 author: Igor Ranieri
 maintainer: igor@elland.me
 build-type: Simple
 extra-doc-files: CHANGELOG.md
 common common
  ghc-options: -Wall
  default-extensions:
    BlockArguments
    DuplicateRecordFields
    NoFieldSelectors
 executable haddock2
  import: common
  main-is: Main.hs
  build-depends: base >=4.20.1.0
  hs-source-dirs: app
  default-language: GHC2024
 library haddock2-lib
  import: common
  build-depends:
    base >=4.20.1.0,
    parsec ^>=3.1.18.0,
    text ^>=2.1.2,
  hs-source-dirs: src
  -- cabal-gild: discover src
  exposed-modules:
    Identifier
    Lexer
    Parser
    Parser.Util
    ParserMonad
    Types
  default-language: GHC2024
 test-suite haddock2-test
  import: common
  type: exitcode-stdio-1.0
  main-is: Spec.hs
  build-depends:
    base >=4.20.1.0,
    haddock2:{haddock2-lib},
    hspec ^>=2.11.0,
    text ^>=2.1.2,
  hs-source-dirs: test
  default-language: GHC2024
--- a/markup.md
+++ b/markup.md
@ -0,0 +1,198 @@
 # Haddock Markup Language Examples
 ## Text Formatting
 ### Emphasis
 ```
 /emphasized text/
 ```
 Single line only, no newlines allowed.
 ### Bold
 ```
 __bold text__
 ```
 Single line only, no newlines allowed.
 ### Monospace/Code
 ```
@monospace text@
 ```
 Can span multiple lines.
 ## Links and References
 ### Module Links
 ```
 "Module.Name"
 "Module.Name#anchor"
 "Module.Name\#anchor"
 ```
 ### Hyperlinks
 ```
 <http://example.com>
 <http://example.com label text>
 ```
 Auto-detected URLs:
 ```
 http://example.com
 https://example.com
 ftp://example.com
 ```
 ### Markdown-style Links
 ```
 [link text](http://example.com)
 [link text]("Module.Name")
 ```
 ### Anchors
 ```
 #anchor-name#
 ```
 No spaces allowed in anchor names.
 ### Identifiers
 ```
 'identifier'
 ```
 Links to Haskell identifiers.
 ## Images
 ### Basic Images
 ```
 <<image.png>>
 <<image.png title text>>
 ```
 ### Markdown Images
 ```
 ![alt text](image.png)
 ```
 ## Math
 ### Inline Math
 ```
 \(mathematical expression\)
 ```
 Single line only.
 ### Display Math
 ```
 \[mathematical expression\]
 ```
 Can span multiple lines.
 ## Code and Examples
 ### Code Blocks
 ```
@
 code block content
 with multiple lines
@
 ```
 ### Bird Tracks (Code)
 ```
 > code line 1
 > code line 2
 ```
 Each line starts with `>` followed by optional space.
 ### Examples
 ```
 >>> expression
 result line 1
 result line 2
 >>> another expression
 result
 ```
 ### Properties
 ```
 prop> property description
 ```
 ## Lists
 ### Unordered Lists
 ```
 * item 1
 * item 2
  continued content
 - item 1
 - item 2
 ```
 ### Ordered Lists
 ```
 1. item 1
 2. item 2
 (1) item 1
 (2) item 2
 ```
 ### Definition Lists
 ```
 [term] definition content
 [another term] more definition content
 ```
 Optional colon after closing bracket.
 ## Tables
 ### Grid Tables
 ```
 +----------+----------+
 | Header 1 | Header 2 |
 +==========+==========+
 | Cell 1   | Cell 2   |
 +----------+----------+
 | Cell 3   | Cell 4   |
 +----------+----------+
 ```
 - First row determines table width
 - Header separator uses `=` characters
 - Regular separators use `-`
 - Edges can be `+` or `|`
 ## Headers
 ```
 = Level 1 Header
 == Level 2 Header
 === Level 3 Header
 ==== Level 4 Header
 ===== Level 5 Header
 ====== Level 6 Header
 ```
 Up to 6 levels deep.
 ## Special Elements
 ### Since Annotations
 ```
@since package-name-1.2.3
@since 1.2.3
 ```
 ### Numeric Character References
 ```
 &#65;     (decimal)
 &#x41;    (hexadecimal)
 &#X41;    (hexadecimal)
 ```
 ## Escaping
 Use backslash `\` to escape special characters. Trailing backslash without following character is treated as literal backslash.
 ## Structure
 - Paragraphs separated by blank lines
 - 4-space indentation for nested content
 - Whitespace handling varies by context
--- a/src/Identifier.hs
+++ b/src/Identifier.hs
@ -0,0 +1,157 @@
 module Identifier (
  Identifier (..),
  Namespace (..),
  parseValid,
 ) where
 import Data.Text qualified as Text
 import Text.Parsec qualified as Parsec
 import Control.Monad (guard)
 import Data.Char (isAlpha, isAlphaNum)
 import Data.Functor (($>))
 import Data.Maybe (listToMaybe, maybeToList)
 import Data.Text (Text)
 import ParserMonad
 import Text.Parsec (State (..))
 import Text.Parsec.Pos (updatePosChar)
 import Text.Read.Lex (isSymbolChar)
 -- | Identifier string surrounded with namespace, opening, and closing quotes/backticks.
 data Identifier = Identifier !Namespace !Char String !Char
  deriving (Show, Eq)
 -- | The namespace qualification for an identifier.
 data Namespace = Value | Type | None deriving (Eq, Ord, Enum, Show)
 parseValid :: Parser Identifier
 parseValid = do
  state@State{stateInput = input, statePos = pos} <- Parsec.getParserState
  case takeIdentifier input of
    Nothing -> Parsec.parserFail "parseValid: Failed to match a valid identifier"
    Just (namespace, op, ident, cl, input') ->
      let posOp = updatePosChar pos op
          posIdent = Text.foldl updatePosChar posOp ident
          posCl = updatePosChar posIdent cl
          newState = state{stateInput = input', statePos = posCl}
       in Parsec.setParserState newState $> Identifier namespace op (Text.unpack ident) cl
 {- | Try to parse a delimited identifier off the front of the given input.
 This tries to match as many valid Haskell identifiers/operators as possible,
 to the point of sometimes accepting invalid things (ex: keywords). Some
 considerations:
  - operators and identifiers can have module qualifications
  - operators can be wrapped in parens (for prefix)
  - identifiers can be wrapped in backticks (for infix)
  - delimiters are backticks or regular ticks
  - since regular ticks are also valid in identifiers, we opt for the
    longest successful parse
 This function should make /O(1)/ allocations
 -}
 takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text)
 takeIdentifier input = listToMaybe $ do
  -- Optional namespace
  let (namespace, input') = case Text.uncons input of
        Just ('v', i) -> (Value, i)
        Just ('t', i) -> (Type, i)
        _ -> (None, input)
  -- Opening tick
  (op, input'') <- maybeToList (Text.uncons input')
  guard (op == '\'' || op == '`')
  -- Identifier/operator
  (ident, input''') <- wrapped input''
  -- Closing tick
  (cl, input'''') <- maybeToList (Text.uncons input''')
  guard (cl == '\'' || cl == '`')
  pure (namespace, op, ident, cl, input'''')
 where
  -- \| Parse out a wrapped, possibly qualified, operator or identifier
  wrapped t = do
    (c, t') <- maybeToList (Text.uncons t)
    -- Tuples
    case c of
      '('
        | Just (c', _) <- Text.uncons t'
        , c' == ',' || c' == ')' ->
            do
              let (commas, t'') = Text.span (== ',') t'
              (')', t''') <- maybeToList (Text.uncons t'')
              pure (Text.take (Text.length commas + 2) t, t''')
      -- Parenthesized
      '(' -> do
        (n, t'') <- general False 0 [] t'
        (')', t''') <- maybeToList (Text.uncons t'')
        pure (Text.take (n + 2) t, t''')
      -- Backticked
      '`' -> do
        (n, t'') <- general False 0 [] t'
        ('`', t''') <- maybeToList (Text.uncons t'')
        pure (Text.take (n + 2) t, t''')
      -- Unadorned
      _ -> do
        (n, t'') <- general False 0 [] t
        pure (Text.take n t, t'')
  -- \| Parse out a possibly qualified operator or identifier
  general ::
    Bool ->
    -- \^ refuse inputs starting with operators
    Int ->
    -- \^ total characters \"consumed\" so far
    [(Int, Text)] ->
    -- \^ accumulated results
    Text ->
    -- \^ current input
    [(Int, Text)]
  -- \^ total characters parsed & what remains
  general !identOnly !i acc t
    -- Starts with an identifier (either just an identifier, or a module qual)
    | Just (n, rest) <- identLike t =
        if Text.null rest
          then acc
          else case Text.head rest of
            '`' -> (n + i, rest) : acc
            ')' -> (n + i, rest) : acc
            '.' -> general False (n + i + 1) acc (Text.tail rest)
            '\'' ->
              let (m, rest') = quotes rest
               in general True (n + m + 1 + i) ((n + m + i, rest') : acc) (Text.tail rest')
            _ -> acc
    -- An operator
    | Just (n, rest) <- optr t
    , not identOnly =
        (n + i, rest) : acc
    -- Anything else
    | otherwise =
        acc
  -- \| Parse an identifier off the front of the input
  identLike t
    | Text.null t = Nothing
    | isAlpha (Text.head t) || '_' == Text.head t =
        let !(idt, rest) = Text.span (\c -> isAlphaNum c || c == '_') t
            !(octos, rest') = Text.span (== '#') rest
         in Just (Text.length idt + Text.length octos, rest')
    | otherwise = Nothing
  -- \| Parse all but the last quote off the front of the input
  -- PRECONDITION: T.head t `elem` ['\'', '`']
  quotes :: Text -> (Int, Text)
  quotes t =
    let !n = Text.length (Text.takeWhile (`elem` ['\'', '`']) t) - 1
     in (n, Text.drop n t)
  -- \| Parse an operator off the front of the input
  optr t =
    let !(op, rest) = Text.span isSymbolChar t
     in if Text.null op then Nothing else Just (Text.length op, rest)
--- a/src/Lexer.hs
+++ b/src/Lexer.hs
@ -0,0 +1,190 @@
 {-# LANGUAGE OverloadedStrings #-}
 module Lexer (
  Token (..),
  lexer,
  emphasis,
 ) where
 import Control.Monad (mfilter)
 import Data.Char (isAlphaNum, isPrint)
 import Data.Functor (($>))
 import Data.Text (Text)
 import Data.Text qualified as Text
 import GHC.Stack (HasCallStack)
 import ParserMonad (Parser, initialParserState)
 import Text.Parsec
 import Text.Parsec qualified as Parsec
 import Text.Parsec.Pos (updatePosChar)
 type LocatedToken = (SourcePos, Token)
 type Lexer = Parser [LocatedToken]
 data Token
  = Token Text
  | Anchor
  | AngleOpen
  | AngleClose
  | BoldOpen
  | BoldClose
  | BracketOpen
  | BracketClose
  | EmphasisOpen
  | EmphasisClose
  | MonospaceOpen
  | MonospaceClose
  | Newline
  | ParenOpen
  | ParenClose
  | QuoteOpen
  | QuoteClose
  | Space
  | EOF
  deriving (Eq, Show)
 lexer :: String -> Either ParseError [LocatedToken]
 lexer = Parsec.runParser lexText initialParserState "input" . Text.pack
 lexText :: (HasCallStack) => Parser [LocatedToken]
 lexText = go
 where
  go = do
    Parsec.optionMaybe Parsec.eof >>= \case
      Just _ -> pure []
      Nothing -> do
        toks <-
          choice
            [ newlineToken
            , spaceToken
            , textElement
            , identifier
            , other
            ]
        rest <- go
        pure (toks <> rest)
 match :: Parser a -> Parser (Text, a)
 match p = do
  input <- getInput
  result <- p
  input' <- getInput
  let !consumed = Text.take (Text.length input - Text.length input') input
  pure (consumed, result)
 -- Tokens
 textElement :: Parser [LocatedToken]
 textElement =
  choice $
    Parsec.try
      <$> [ emphasis
          , bold
          , monospace
          , parens
          , brackets
          , angles
          ]
 delimited :: String -> String -> Token -> Token -> Parser [LocatedToken]
 delimited c1 c2 ot ct = do
  pos <- getPosition
  (_, content) <- match $ between op cl any'
  innerToks <- case lexer $ Text.unpack content of
    Left _ -> do
      pos' <- getPosition
      pure $ [(pos', Token content)]
    Right toks -> pure toks
  let openTok :: LocatedToken = (pos, ot)
      closeTok :: LocatedToken = (pos, ct)
  pure $ openTok : innerToks <> [closeTok]
 where
  op = string c1
  cl = string c2
  any' = Text.pack <$> manyTill anyChar (lookAhead cl)
 delimited' :: String -> Token -> Token -> Parser [LocatedToken]
 delimited' s t1 t2 = delimited s s t1 t2
 emphasis :: Lexer
 emphasis = delimited' "/" EmphasisOpen EmphasisClose
 bold :: Lexer
 bold = delimited' "__" BoldOpen BoldClose
 monospace :: Lexer
 monospace = delimited' "@" MonospaceOpen MonospaceClose
 parens :: Parser [LocatedToken]
 parens = delimited "(" ")" ParenOpen ParenClose
 brackets :: Lexer
 brackets = delimited "[" "]" ParenOpen ParenClose
 angles :: Parser [LocatedToken]
 angles = delimited "<" ">" AngleOpen AngleClose
 other :: Lexer
 other = do
  pos <- getPosition
  c <- takeWhile1_ isPrint
  pure . pure $ (pos, Token c)
 spaceToken :: Lexer
 spaceToken = do
  pos <- getPosition
  _ <- many1 (char ' ')
  pure . pure $ (pos, Space)
 newlineToken :: Lexer
 newlineToken = do
  pos <- getPosition
  _ <- newline
  pure . pure $ (pos, Newline)
 identifier :: Lexer
 identifier = do
  pos <- getPosition
  txt <- takeWhile1_ isAlphaNum
  pure . pure $ (pos, Token txt)
 -------
 -- Helpers
 -------
 -- | Like `takeWhile`, but unconditionally take escaped characters.
 takeWhile_ :: (Char -> Bool) -> Parser Text
 takeWhile_ p = scan p_ False
 where
  p_ escaped c
    | escaped = Just False
    | not $ p c = Nothing
    | otherwise = Just (c == '\\')
 -- | Like 'takeWhile1', but unconditionally take escaped characters.
 takeWhile1_ :: (Char -> Bool) -> Parser Text
 takeWhile1_ = mfilter (not . Text.null) . takeWhile_
 {- | Scan the input text, accumulating characters as long as the scanning
 function returns true.
 -}
 scan ::
  -- | scan function
  (state -> Char -> Maybe state) ->
  -- | initial state
  state ->
  Parser Text
 scan f initState = do
  parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
  (remaining, finalPos, ct) <- go input initState pos 0
  let newState = parserState{stateInput = remaining, statePos = finalPos}
  Parsec.setParserState newState $> Text.take ct input
 where
  go !input' !st !posAccum !count' = case Text.uncons input' of
    Nothing -> pure (input', posAccum, count')
    Just (char', input'') -> case f st char' of
      Nothing -> pure (input', posAccum, count')
      Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
--- a/src/Parser.hs
+++ b/src/Parser.hs
@ -0,0 +1,228 @@
 {-# LANGUAGE OverloadedStrings #-}
 module Parser (
  parse,
  parseText,
 ) where
 import Identifier
 import ParserMonad
 import Types
 import Control.Applicative
 import Control.Monad
 import Data.Char qualified as Char
 import Data.Functor (($>))
 import Data.List (intercalate)
 import Data.Text (Text)
 import Data.Text qualified as Text
 import Parser.Util
 import Text.Parsec qualified as Parsec
 parse :: Parser a -> Text -> (ParserState, a)
 parse parser = either err id . parse' (parser <* Parsec.eof)
 where
  err = error . ("Haddock.Parser.parse: " ++)
 --------------------
 -- Markup
 --------------------
 {- | Skips a single special character and treats it as a plain string.
 This is done to skip over any special characters belonging to other
 elements but which were not deemed meaningful at their positions.
 -}
 skipSpecialChar :: Parser (DocMarkup mod a)
 skipSpecialChar = DocString . pure <$> Parsec.oneOf specialChar
 {- | Plain, regular parser for text. Called as one of the last parsers
 to ensure that we have already given a chance to more meaningful parsers
 before capturing their characters.
 -}
 string' :: Parser (DocMarkup mod a)
 string' =
  DocString
    -- After the first character, stop for @\(@ or @\[@ math starters. (The
    -- first character won't start a valid math string because this parser
    -- should follow math parsers. But this parser is expected to accept at
    -- least one character from all inputs that don't start with special
    -- characters, so the first character parser can't have the @"(["@
    -- restriction.)
    <$> ((:) <$> rawOrEscChar "" <*> many (rawOrEscChar "(["))
 where
  -- \| Parse a single logical character, either raw or escaped. Don't accept
  -- escaped characters from the argument string.
  rawOrEscChar :: [Char] -> Parser Char
  rawOrEscChar restrictedEscapes =
    Parsec.try $
      Parsec.noneOf specialChar >>= \case
        -- Handle backslashes:
        --   - Fail on forbidden escape characters.
        --   - Non-forbidden characters: simply unescape, e.g. parse "\b" as 'b',
        --   - Trailing backslash: treat it as a raw backslash, not an escape
        --     sequence. (This is the logic that this parser followed when this
        --     comment was written; it is not necessarily intentional but now I
        --     don't want to break anything relying on it.)
        '\\' -> Parsec.noneOf restrictedEscapes <|> Parsec.eof $> '\\'
        c -> pure c
 {- | Emphasis parser.
 >>> parseString "/Hello world/"
 DocEmphasis (DocString "Hello world")
 -}
 emphasis :: Parser (DocMarkup mod Identifier)
 emphasis =
  DocEmphasis . parseAll
    <$> disallowNewline ("/" *> takeWhile1_ (/= '/') <* "/")
 {- | Bold parser.
 >>> parseString "__Hello world__"
 DocBold (DocString "Hello world")
 -}
 bold :: Parser (DocMarkup mod Identifier)
 bold = DocBold . parseAll <$> disallowNewline ("__" *> takeUntil "__")
 {- | Monospaced strings.
 >>> parseString "@cruel@"
 DocMonospaced (DocString "cruel")
 -}
 monospace :: Parser (DocMarkup mod Identifier)
 monospace =
  DocMonospace . parseAll
    <$> ("@" *> takeWhile1_ (/= '@') <* "@")
 {- | Text anchors to allow for jumping around the generated documentation.
 >>> parseString "#Hello world#"
 DocAName "Hello world"
 -}
 anchor :: Parser (DocMarkup mod a)
 anchor =
  DocAnchor . Text.unpack
    <$> ("#" *> takeWhile1_ (\x -> x /= '#' && not (Char.isSpace x)) <* "#")
 -- | Parses identifiers with help of 'parseValid'.
 identifier :: Parser (DocMarkup mod Identifier)
 identifier = DocIdentifier <$> parseValid
 {- | Module names.
 Note that we allow '#' and '\' to support anchors (old style anchors are of
 the form "SomeModule\#anchor").
 -}
 moduleName :: Parser (DocMarkup mod a)
 moduleName = DocModule . flip ModuleLink Nothing <$> ("\"" *> moduleNameString <* "\"")
 -- | A module name, optionally with an anchor
 moduleNameString :: Parser String
 moduleNameString = moduleId `maybeFollowedBy` anchor_
 where
  moduleId = intercalate "." <$> conid `Parsec.sepBy1` "."
  anchor_ =
    (++)
      <$> (Parsec.string "#" <|> Parsec.string "\\#")
      <*> many (Parsec.satisfy (\c -> c /= '"' && not (Char.isSpace c)))
  maybeFollowedBy pre suf = (\x -> maybe x (x ++)) <$> pre <*> optional suf
  conid =
    (:)
      <$> Parsec.satisfy (\c -> Char.isAlpha c && Char.isUpper c)
      <*> many conChar
  conChar = Parsec.alphaNum <|> Parsec.char '_'
 ------------------------
 -- Markup components
 ------------------------
 {- | List of characters that we use to delimit any special markup.
 Once we have checked for any of these and tried to parse the
 relevant markup, we can assume they are used as regular text.
 -}
 specialChar :: [Char]
 specialChar = "_/<@\"&'`#[ "
 ------------------------
 --  Helpers
 ------------------------
 parse' :: Parser a -> Text -> Either String (ParserState, a)
 parse' parser t =
  let parser' = (,) <$> parser <*> Parsec.getState
   in case Parsec.runParser parser' initialParserState "<haddock>" t of
        Left e -> Left (show e)
        Right (x, s) -> Right (s, x)
 docConcat :: [DocMarkup mod id] -> DocMarkup mod id
 docConcat = foldr docAppend DocEmpty
 where
  -- Prevent doc append from becoming too nested
  docAppend (DocDefinitionList ds1) (DocDefinitionList ds2) = DocDefinitionList (ds1 <> ds2)
  docAppend (DocDefinitionList ds1) (DocAppend (DocDefinitionList ds2) d) = DocAppend (DocDefinitionList (ds1 <> ds2)) d
  docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 <> ds2)
  docAppend (DocOrderedList ds1) (DocAppend (DocOrderedList ds2) d) = DocAppend (DocOrderedList (ds1 <> ds2)) d
  docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 <> ds2)
  docAppend (DocUnorderedList ds1) (DocAppend (DocUnorderedList ds2) d) = DocAppend (DocUnorderedList (ds1 <> ds2)) d
  docAppend DocEmpty d = d
  docAppend d DocEmpty = d
  docAppend (DocString s1) (DocString s2) = DocString (s1 <> s2)
  docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 <> s2))
  docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 <> s2)) d
  docAppend d1 d2 = DocAppend d1 d2
 {- | Parse a text paragraph. Actually just a wrapper over 'parseAll' which
 drops leading whitespace.
 -}
 parseText :: Text -> DocMarkup mod Identifier
 parseText = parseAll . Text.dropWhile Char.isSpace . Text.filter (/= '\r')
 parseAll :: Text -> DocMarkup mod Identifier
 parseAll = snd . parse myParser
 where
  -- docConcat
  --   <$> many
  --     ( choice'
  --         [ monospace
  --         , anchor
  --         , identifier
  --         , moduleName
  --         , picture
  --         , mathDisplay
  --         , mathInline
  --         , markdownImage
  --         , markdownLink
  --         , hyperlink
  --         , bold
  --         , emphasis
  --         , encodedChar
  --         , string'
  --         , skipSpecialChar
  --         ]
  --     )
  myParser :: Parser (DocMarkup mod Identifier)
  myParser =
    docConcat
      <$> many
        ( choice'
            [ monospace
            , anchor
            , identifier
            , moduleName
            , bold
            , emphasis
            , string'
            , skipSpecialChar
            ]
        )
 choice' :: [Parser a] -> Parser a
 choice' [] = empty
 choice' [p] = p
 choice' (p : ps) = Parsec.try p <|> choice' ps
 disallowNewline :: Parser Text -> Parser Text
 disallowNewline = mfilter (Text.all (/= '\n'))
--- a/src/Parser/Util.hs
+++ b/src/Parser/Util.hs
@ -0,0 +1,65 @@
 module Parser.Util where
 import Control.Monad (mfilter)
 import Data.Functor (($>))
 import Data.Text (Text)
 import Data.Text qualified as Text
 import ParserMonad (Parser)
 import Text.Parsec (State (..))
 import Text.Parsec qualified as Parsec
 import Text.Parsec.Pos (updatePosChar)
 {- | Consume characters from the input up to and including the given pattern.
 Return everything consumed except for the end pattern itself.
 -}
 takeUntil :: Text -> Parser Text
 takeUntil end_ = Text.dropEnd (Text.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome
 where
  end = Text.unpack end_
  p :: (Bool, String) -> Char -> Maybe (Bool, String)
  p acc c = case acc of
    (True, _) -> Just (False, end)
    (_, []) -> Nothing
    (_, x : xs) | x == c -> Just (False, xs)
    _ -> Just (c == '\\', end)
  requireEnd = mfilter (Text.isSuffixOf end_)
  gotSome xs
    | Text.null xs = fail "didn't get any content"
    | otherwise = return xs
 -- | Like `takeWhile`, but unconditionally take escaped characters.
 takeWhile_ :: (Char -> Bool) -> Parser Text
 takeWhile_ p = scan p_ False
 where
  p_ escaped c
    | escaped = Just False
    | not $ p c = Nothing
    | otherwise = Just (c == '\\')
 -- | Like 'takeWhile1', but unconditionally take escaped characters.
 takeWhile1_ :: (Char -> Bool) -> Parser Text
 takeWhile1_ = mfilter (not . Text.null) . takeWhile_
 {- | Scan the input text, accumulating characters as long as the scanning
 function returns true.
 -}
 scan ::
  -- | scan function
  (state -> Char -> Maybe state) ->
  -- | initial state
  state ->
  Parser Text
 scan f initState = do
  parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
  (remaining, finalPos, ct) <- go input initState pos 0
  let newState = parserState{stateInput = remaining, statePos = finalPos}
  Parsec.setParserState newState $> Text.take ct input
 where
  go !input' !st !posAccum !count' = case Text.uncons input' of
    Nothing -> pure (input', posAccum, count')
    Just (char', input'') -> case f st char' of
      Nothing -> pure (input', posAccum, count')
      Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
--- a/src/ParserMonad.hs
+++ b/src/ParserMonad.hs
@ -0,0 +1,26 @@
 module ParserMonad where
 import Data.String
 import Data.Text (Text)
 import Data.Text qualified as Text
 import Text.Parsec (Parsec)
 import Text.Parsec qualified as Parsec
 import Types
 type Parser = Parsec Text ParserState
 instance (a ~ Text) => IsString (Parser a) where
  fromString = fmap Text.pack . Parsec.string
 {- | The only bit of information we really care about trudging along with us
 through parsing is the version attached to a @\@since@ annotation - if
 the doc even contained one.
 -}
 newtype ParserState = ParserState
  { since :: Maybe Since
  }
  deriving (Eq, Show)
 initialParserState :: ParserState
 initialParserState = ParserState Nothing
--- a/src/Types.hs
+++ b/src/Types.hs
@ -0,0 +1,196 @@
 module Types (
  DocMarkup (..),
  Document (..),
  Meta (..),
  ModuleLink (..),
  Package,
  Since (..),
  Version,
 )
 where
 newtype Document = Document
  { meta :: Meta
  }
  deriving (Eq, Show)
 newtype Meta = Meta
  { since :: Maybe Since
  }
  deriving (Eq, Show)
 data Since = Since
  { package :: Maybe Package
  -- ^ optional package qualification
  , version :: Version
  }
  deriving (Eq, Show)
 -- Could have a better type?
 type Version = [Int]
 type Package = String
 data DocMarkup mod id
  = DocEmpty
  | -- | This is not represented in the markup language, this is for internal use
    DocAppend (DocMarkup mod id) (DocMarkup mod id)
  | -- | Any text that doesn't match any rules is a bare string
    DocString String
  | -- | Paragraphs are demarcated by blank lines
    DocParagraph (DocMarkup mod id)
  | -- | A haskell identifier
    DocIdentifier id
  | -- | A qualified identifier that couldn't be resolved.
    DocIdentifierUnchecked
  | -- | A link to a module, might include a label
    DocModule (ModuleLink (DocMarkup mod id))
  | -- | Emphasis /italics/
    DocEmphasis (DocMarkup mod id)
  | -- | Monospaced @source code@
    DocMonospace (DocMarkup mod id)
  | -- | Bold __bold text__
    DocBold (DocMarkup mod id)
  | {- | Unordered lists
      * this
      or
      - this
    -}
    DocUnorderedList [DocMarkup mod id]
  | {- | Ordered lists
      1. this
      or
      (1) this
    -}
    DocOrderedList [(Int, DocMarkup mod id)]
  | {- | Definition lists
      [term] a term
      [another term] another definition
    -}
    DocDefinitionList [(DocMarkup mod id, DocMarkup mod id)]
  | {- | Code blocks
      @
      a code block in here
      with multiple lines
      @
      Or with bird tracks:
      > some code
      > goes here
    -}
    DocCodeBlock (DocMarkup mod id)
  | {- | Hyperlinks
      __marked__:
      <http://example.com>
      <http://example.com label text>
      __Auto-detected URLs__:
      http://example.com
      https://example.com
      ftp://example.com
      __Markdown style__
      [link text](http://example.com)
      [link text]("Module.Name")
    -}
    DocHyperlink (Hyperlink (DocMarkup mod id))
  | {- | Pictures
        <<image.png>>
        <<image.png title text>>
        __Markdown Images__
        ![alt text](image.png)
    -}
    DocPicture Picture
  | {- | Inline math expressions
      \(mathematical expression\)
    -}
    DocMathInline String
  | {- | Math multiline display
    \[
      mathematical expression
      in multiple lines
    \]
    -}
    DocMathDisplay String
  | {- | Anchors, no spaces allowed
      #anchor-name#
    -}
    DocAnchor String
  | {- | Property descriptions
      prop> property description
    -}
    DocProperty String
  | {- | Examples
      >>> expression
      result line 1
      result line 2
    -}
    DocExamples [Example]
  | -- | Header
    DocHeader (Header (DocMarkup mod id))
  | -- Table
    DocTable (Table (DocMarkup mod id))
  deriving (Eq, Show)
 instance Semigroup (DocMarkup mod id) where
  (<>) = DocAppend
 instance Monoid (DocMarkup mod id) where
  mempty = DocEmpty
  mconcat = foldr (<>) mempty
 data ModuleLink id = ModuleLink
  { name :: String
  , label :: Maybe id
  }
  deriving (Eq, Show)
 data Picture = Picture
  { uri :: String
  , title :: Maybe String
  }
  deriving (Eq, Show)
 data Hyperlink id = Hyperlink
  { url :: String
  , label :: Maybe id
  }
  deriving (Eq, Show, Functor, Foldable, Traversable)
 data TableCell id = TableCell
  { col :: Int
  , row :: Int
  , content :: id
  }
  deriving (Eq, Show, Functor, Foldable, Traversable)
 newtype TableRow id = TableRow
  { rows :: [TableCell id]
  }
  deriving (Eq, Show, Functor, Foldable, Traversable)
 data Table id = Table
  { headerRows :: [TableRow id]
  , bodyRows :: [TableRow id]
  }
  deriving (Eq, Show, Functor, Foldable, Traversable)
 data Example = Example
  { exampleExpression :: String
  , exampleResult :: [String]
  }
  deriving (Eq, Show)
 data Header id = Header
  { level :: HeaderLevel
  , title :: id
  }
  deriving (Eq, Show)
 data HeaderLevel
  = H1
  | H2
  | H3
  | H4
  | H5
  | H6
  deriving (Eq, Show, Bounded, Enum)
--- a/test/Spec.hs
+++ b/test/Spec.hs
@ -0,0 +1,34 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-# OPTIONS_GHC -Wno-orphans #-}
 import Test.Hspec
 import Data.String (IsString (..))
 import Data.Text (Text)
 import Identifier (Identifier)
 import Lexer
 import Parser
 import Types
 main :: IO ()
 main = hspec $ do
  describe "Lexer" do
    it "lexes" do
      lexer "This is string" `shouldBe` undefined
  describe "Parser" do
    it "Bold" do
      "__bold__" `shouldParseTo` (DocBold (DocString "bold"))
    it "Emphasis" do
      "/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis"))
 shouldParseTo :: Text -> DocMarkup mod Identifier -> Expectation
 shouldParseTo input ast = parseText input `shouldBe` ast
 type Doc id = DocMarkup () id
 instance IsString (Doc String) where
  fromString = DocString
 file :: IO String
 file = readFile "test/markup.md"
--- a/test/markup.md
+++ b/test/markup.md
@ -0,0 +1,89 @@
 /emphasized text/
 __bold text__
@monospace text@
 "Module.Name"
 "Module.Name#anchor"
 "Module.Name#anchor"
 [http://example.com](http://example.com)
 <[http://example.com](http://example.com) label text>
 [http://example.com](http://example.com)
 [https://example.com](https://example.com)
 ftp\://example.com
 [link text](http://example.com)
 [link text]("Module.Name")
 #anchor-name#
 'identifier'
 <<image.png>>
 <<image.png title text>>
 ![alt text](image.png)
 \(mathematical expression\)
 \[mathematical expression\]
@
 code block content
 with multiple lines
@
 > code line 1
 > code line 2
 >>> expression
 result line 1
 result line 2
 >>> another expression
 result
 prop> property description
 * item 1
 * item 2
  continued content
 - item 1
 - item 2
 1. item 1
 2. item 2
 (1) item 1
 (2) item 2
 [term] definition content
 [another term] more definition content
 +----------+----------+
 | Header 1 | Header 2 |
 +==========+==========+
 | Cell 1   | Cell 2   |
 +----------+----------+
 | Cell 3   | Cell 4   |
 +----------+----------+
 = Level 1 Header
 == Level 2 Header
 === Level 3 Header
 ==== Level 4 Header
 ===== Level 5 Header
 ====== Level 6 Header
@since package-name-1.2.3
@since 1.2.3
 &#65;     (decimal)
 &#x41;    (hexadecimal)
 &#X41;    (hexadecimal)
 This \@escapes\@ at signs