Init

2025-09-21 08:19:01 +02:00 · 2025-09-21 08:19:01 +02:00 · c9f61c4e06
commit c9f61c4e06
16 changed files with 1366 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+dist-newstyle
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1,5 @@
+# Revision history for haddock2
+
+## 0.1.0.0 -- YYYY-mm-dd
+
+* First version. Released on an unsuspecting world.
--- a/Grammar.ebnf
+++ b/Grammar.ebnf
@ -0,0 +1,88 @@
+document ::= paragraph*
+
+paragraph ::= ( text_paragraph | code_block | header | list | table ) newline?
+
+text_paragraph ::= text_element+
+
+text_element ::= emphasis | bold | monospace | link | anchor | identifier |
+                 inline_math | image | plain_text | escaped_char
+
+emphasis ::= '/' text_no_newline '/'
+bold ::= '__' text_no_newline '__'
+monospace ::= '@' text_content '@'
+
+link ::= module_link | hyperlink | markdown_link
+module_link ::= '"' module_name ( '#' anchor_name )? '"'
+hyperlink ::= '<' url ( ' ' link_text )? '>'
+markdown_link ::= '[' link_text '](' ( url | module_link ) ')'
+
+anchor ::= '#' anchor_name '#'
+identifier ::= "'" haskell_id "'"
+
+inline_math ::= '\(' math_expr '\)'
+image ::= '<<' image_path ( ' ' image_title )? '>>' |
+          '![' alt_text '](' image_path ')'
+
+code_block ::= at_block | bird_tracks | example_block | property_block
+at_block ::= '@' newline code_content newline '@'
+bird_tracks ::= ( '>' ' '? code_line newline )+
+example_block ::= ( '>>>' ' ' expression newline result_line* )+
+property_block ::= 'prop>' ' ' property_desc newline
+
+header ::= header_marker ' ' header_text newline
+header_marker ::= '=' | '==' | '===' | '====' | '=====' | '======'
+
+list ::= unordered_list | ordered_list | definition_list
+unordered_list ::= ( list_marker ' ' list_content )+
+list_marker ::= '*' | '-'
+ordered_list ::= ( number_marker ' ' list_content )+
+number_marker ::= digit+ '.' | '(' digit+ ')'
+definition_list ::= ( '[' term ']' ':'? ' ' definition_content )+
+
+table ::= table_border header_row header_sep data_row* table_border
+table_border ::= '+' ( '-' | '+' )* newline
+header_row ::= '|' ( table_cell '|' )* newline
+header_sep ::= '+' ( '=' | '+' )* newline
+data_row ::= '|' ( table_cell '|' )* newline
+
+plain_text ::= text_char+
+text_char ::= letter | digit | ' ' | punctuation
+text_no_newline ::= ( letter | digit | ' ' | safe_punctuation )+
+text_content ::= ( letter | digit | ' ' | newline | punctuation )*
+code_content ::= code_char*
+code_char ::= letter | digit | ' ' | newline | punctuation
+code_line ::= ( letter | digit | ' ' | punctuation )*
+list_content ::= text_element* ( newline '    ' text_element* )?
+table_cell ::= ' ' table_char* ' '
+table_char ::= letter | digit | ' ' | safe_punctuation
+header_text ::= ( letter | digit | ' ' | punctuation )+
+link_text ::= link_char+
+link_char ::= letter | digit | ' ' | safe_punctuation
+alt_text ::= alt_char+
+alt_char ::= letter | digit | ' ' | safe_punctuation
+image_path ::= ( letter | digit | '.' | '/' | '-' | '_' )+
+image_title ::= image_title_char+
+image_title_char ::= letter | digit | ' ' | safe_punctuation
+anchor_name ::= letter ( letter | digit | '-' )*
+haskell_id ::= letter ( letter | digit | '_' | "'" )*
+module_name ::= haskell_id ( '.' haskell_id )*
+url ::= ( 'http://' | 'https://' | 'ftp://' ) url_char+
+url_char ::= letter | digit | '/' | '.' | ':' | '-' | '_' | '?' | '&' | '='
+math_expr ::= math_char+
+math_char ::= letter | digit | ' ' | math_punctuation
+property_desc ::= ( letter | digit | ' ' | punctuation )+
+expression ::= ( letter | digit | ' ' | punctuation )+
+result_line ::= result_char* newline
+result_char ::= letter | digit | ' ' | safe_punctuation
+term ::= term_char+
+term_char ::= letter | digit | ' ' | safe_punctuation
+definition_content ::= text_element+
+escaped_char ::= '\' special_char
+
+special_char ::= '/' | '*' | '@' | "'" | '"' | '#' | '<' | '>' | '[' | ']' | '(' | ')' | '|' | '=' | '-' | '+'
+safe_punctuation ::= '!' | '$' | '%' | '^' | '&' | '(' | ')' | '_' | '+' | '{' | '}' | ';' | ':' | ',' | '.' | '`' | '~'
+math_punctuation ::= '+' | '-' | '*' | '/' | '^' | '_' | '=' | '(' | ')' | '{' | '}' | '[' | ']'
+punctuation ::= '!' | '@' | '#' | '$' | '%' | '^' | '&' | '*' | '(' | ')' | '-' | '_' | '+' | '=' | '{' | '}' | '[' | ']' | '|' | '\' | ':' | ';' | '"' | "'" | '<' | '>' | '?' | ',' | '.' | '/' | '`' | '~'
+letter ::= 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z'
+digit ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
+newline ::= #xA | #xD #xA | #xD
--- a/29
+++ b/29
@ -0,0 +1,29 @@
+Copyright (c) 2025, Igor Ranieri
+
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+    * Neither the name of the copyright holder nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/app/Main.hs
+++ b/app/Main.hs
@ -0,0 +1,4 @@
+module Main where
+
+main :: IO ()
+main = putStrLn "Hello, Haskell!"
--- a/cabal.project.local
+++ b/cabal.project.local
@ -0,0 +1 @@
+tests: True
--- a/haddock2.cabal
+++ b/haddock2.cabal
@ -0,0 +1,55 @@
+cabal-version: 3.4
+name: haddock2
+version: 0.1.0.0
+license: BSD-3-Clause
+license-file: LICENSE
+author: Igor Ranieri
+maintainer: igor@elland.me
+build-type: Simple
+extra-doc-files: CHANGELOG.md
+
+common common
+  ghc-options: -Wall
+  default-extensions:
+    BlockArguments
+    DuplicateRecordFields
+    NoFieldSelectors
+
+executable haddock2
+  import: common
+  main-is: Main.hs
+  build-depends: base >=4.20.1.0
+  hs-source-dirs: app
+  default-language: GHC2024
+
+library haddock2-lib
+  import: common
+  build-depends:
+    base >=4.20.1.0,
+    parsec ^>=3.1.18.0,
+    text ^>=2.1.2,
+
+  hs-source-dirs: src
+  -- cabal-gild: discover src
+  exposed-modules:
+    Identifier
+    Lexer
+    Parser
+    Parser.Util
+    ParserMonad
+    Types
+
+  default-language: GHC2024
+
+test-suite haddock2-test
+  import: common
+  type: exitcode-stdio-1.0
+  main-is: Spec.hs
+  build-depends:
+    base >=4.20.1.0,
+    haddock2:{haddock2-lib},
+    hspec ^>=2.11.0,
+    text ^>=2.1.2,
+
+  hs-source-dirs: test
+  default-language: GHC2024
--- a/markup.md
+++ b/markup.md
@ -0,0 +1,198 @@
+# Haddock Markup Language Examples
+
+## Text Formatting
+
+### Emphasis
+```
+/emphasized text/
+```
+Single line only, no newlines allowed.
+
+### Bold
+```
+__bold text__
+```
+Single line only, no newlines allowed.
+
+### Monospace/Code
+```
+@monospace text@
+```
+Can span multiple lines.
+
+## Links and References
+
+### Module Links
+```
+"Module.Name"
+"Module.Name#anchor"
+"Module.Name\#anchor"
+```
+
+### Hyperlinks
+```
+<http://example.com>
+<http://example.com label text>
+```
+Auto-detected URLs:
+```
+http://example.com
+https://example.com
+ftp://example.com
+```
+
+### Markdown-style Links
+```
+[link text](http://example.com)
+[link text]("Module.Name")
+```
+
+### Anchors
+```
+#anchor-name#
+```
+No spaces allowed in anchor names.
+
+### Identifiers
+```
+'identifier'
+```
+Links to Haskell identifiers.
+
+## Images
+
+### Basic Images
+```
+<<image.png>>
+<<image.png title text>>
+```
+
+### Markdown Images
+```
+![alt text](image.png)
+```
+
+## Math
+
+### Inline Math
+```
+\(mathematical expression\)
+```
+Single line only.
+
+### Display Math
+```
+\[mathematical expression\]
+```
+Can span multiple lines.
+
+## Code and Examples
+
+### Code Blocks
+```
+@
+code block content
+with multiple lines
+@
+```
+
+### Bird Tracks (Code)
+```
+> code line 1
+> code line 2
+```
+Each line starts with `>` followed by optional space.
+
+### Examples
+```
+>>> expression
+result line 1
+result line 2
+
+>>> another expression
+result
+```
+
+### Properties
+```
+prop> property description
+```
+
+## Lists
+
+### Unordered Lists
+```
+* item 1
+* item 2
+  continued content
+
+- item 1
+- item 2
+```
+
+### Ordered Lists
+```
+1. item 1
+2. item 2
+
+(1) item 1
+(2) item 2
+```
+
+### Definition Lists
+```
+[term] definition content
+[another term] more definition content
+```
+Optional colon after closing bracket.
+
+## Tables
+
+### Grid Tables
+```
+----------+----------+
+| Header 1 | Header 2 |
+==========+==========+
+| Cell 1   | Cell 2   |
+----------+----------+
+| Cell 3   | Cell 4   |
+----------+----------+
+```
+- First row determines table width
+- Header separator uses `=` characters
+- Regular separators use `-`
+- Edges can be `+` or `|`
+
+## Headers
+```
+= Level 1 Header
+== Level 2 Header
+=== Level 3 Header
+==== Level 4 Header
+===== Level 5 Header
+====== Level 6 Header
+```
+Up to 6 levels deep.
+
+## Special Elements
+
+### Since Annotations
+```
+@since package-name-1.2.3
+@since 1.2.3
+```
+
+### Numeric Character References
+```
+&#65;     (decimal)
+&#x41;    (hexadecimal)
+&#X41;    (hexadecimal)
+```
+
+## Escaping
+Use backslash `\` to escape special characters. Trailing backslash without following character is treated as literal backslash.
+
+## Structure
+- Paragraphs separated by blank lines
+- 4-space indentation for nested content
+- Whitespace handling varies by context
--- a/src/Identifier.hs
+++ b/src/Identifier.hs
@ -0,0 +1,157 @@
+module Identifier (
+  Identifier (..),
+  Namespace (..),
+  parseValid,
+) where
+
+import Data.Text qualified as Text
+import Text.Parsec qualified as Parsec
+
+import Control.Monad (guard)
+import Data.Char (isAlpha, isAlphaNum)
+import Data.Functor (($>))
+import Data.Maybe (listToMaybe, maybeToList)
+import Data.Text (Text)
+import ParserMonad
+import Text.Parsec (State (..))
+import Text.Parsec.Pos (updatePosChar)
+import Text.Read.Lex (isSymbolChar)
+
+-- | Identifier string surrounded with namespace, opening, and closing quotes/backticks.
+data Identifier = Identifier !Namespace !Char String !Char
+  deriving (Show, Eq)
+
+-- | The namespace qualification for an identifier.
+data Namespace = Value | Type | None deriving (Eq, Ord, Enum, Show)
+
+parseValid :: Parser Identifier
+parseValid = do
+  state@State{stateInput = input, statePos = pos} <- Parsec.getParserState
+
+  case takeIdentifier input of
+    Nothing -> Parsec.parserFail "parseValid: Failed to match a valid identifier"
+    Just (namespace, op, ident, cl, input') ->
+      let posOp = updatePosChar pos op
+          posIdent = Text.foldl updatePosChar posOp ident
+          posCl = updatePosChar posIdent cl
+          newState = state{stateInput = input', statePos = posCl}
+       in Parsec.setParserState newState $> Identifier namespace op (Text.unpack ident) cl
+
+{- | Try to parse a delimited identifier off the front of the given input.
+
+This tries to match as many valid Haskell identifiers/operators as possible,
+to the point of sometimes accepting invalid things (ex: keywords). Some
+considerations:
+
+  - operators and identifiers can have module qualifications
+  - operators can be wrapped in parens (for prefix)
+  - identifiers can be wrapped in backticks (for infix)
+  - delimiters are backticks or regular ticks
+  - since regular ticks are also valid in identifiers, we opt for the
+    longest successful parse
+
+This function should make /O(1)/ allocations
+-}
+takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text)
+takeIdentifier input = listToMaybe $ do
+  -- Optional namespace
+  let (namespace, input') = case Text.uncons input of
+        Just ('v', i) -> (Value, i)
+        Just ('t', i) -> (Type, i)
+        _ -> (None, input)
+  -- Opening tick
+  (op, input'') <- maybeToList (Text.uncons input')
+  guard (op == '\'' || op == '`')
+
+  -- Identifier/operator
+  (ident, input''') <- wrapped input''
+
+  -- Closing tick
+  (cl, input'''') <- maybeToList (Text.uncons input''')
+  guard (cl == '\'' || cl == '`')
+
+  pure (namespace, op, ident, cl, input'''')
+ where
+  -- \| Parse out a wrapped, possibly qualified, operator or identifier
+  wrapped t = do
+    (c, t') <- maybeToList (Text.uncons t)
+    -- Tuples
+    case c of
+      '('
+        | Just (c', _) <- Text.uncons t'
+        , c' == ',' || c' == ')' ->
+            do
+              let (commas, t'') = Text.span (== ',') t'
+              (')', t''') <- maybeToList (Text.uncons t'')
+              pure (Text.take (Text.length commas + 2) t, t''')
+
+      -- Parenthesized
+      '(' -> do
+        (n, t'') <- general False 0 [] t'
+        (')', t''') <- maybeToList (Text.uncons t'')
+        pure (Text.take (n + 2) t, t''')
+
+      -- Backticked
+      '`' -> do
+        (n, t'') <- general False 0 [] t'
+        ('`', t''') <- maybeToList (Text.uncons t'')
+        pure (Text.take (n + 2) t, t''')
+
+      -- Unadorned
+      _ -> do
+        (n, t'') <- general False 0 [] t
+        pure (Text.take n t, t'')
+
+  -- \| Parse out a possibly qualified operator or identifier
+  general ::
+    Bool ->
+    -- \^ refuse inputs starting with operators
+    Int ->
+    -- \^ total characters \"consumed\" so far
+    [(Int, Text)] ->
+    -- \^ accumulated results
+    Text ->
+    -- \^ current input
+    [(Int, Text)]
+  -- \^ total characters parsed & what remains
+  general !identOnly !i acc t
+    -- Starts with an identifier (either just an identifier, or a module qual)
+    | Just (n, rest) <- identLike t =
+        if Text.null rest
+          then acc
+          else case Text.head rest of
+            '`' -> (n + i, rest) : acc
+            ')' -> (n + i, rest) : acc
+            '.' -> general False (n + i + 1) acc (Text.tail rest)
+            '\'' ->
+              let (m, rest') = quotes rest
+               in general True (n + m + 1 + i) ((n + m + i, rest') : acc) (Text.tail rest')
+            _ -> acc
+    -- An operator
+    | Just (n, rest) <- optr t
+    , not identOnly =
+        (n + i, rest) : acc
+    -- Anything else
+    | otherwise =
+        acc
+
+  -- \| Parse an identifier off the front of the input
+  identLike t
+    | Text.null t = Nothing
+    | isAlpha (Text.head t) || '_' == Text.head t =
+        let !(idt, rest) = Text.span (\c -> isAlphaNum c || c == '_') t
+            !(octos, rest') = Text.span (== '#') rest
+         in Just (Text.length idt + Text.length octos, rest')
+    | otherwise = Nothing
+
+  -- \| Parse all but the last quote off the front of the input
+  -- PRECONDITION: T.head t `elem` ['\'', '`']
+  quotes :: Text -> (Int, Text)
+  quotes t =
+    let !n = Text.length (Text.takeWhile (`elem` ['\'', '`']) t) - 1
+     in (n, Text.drop n t)
+
+  -- \| Parse an operator off the front of the input
+  optr t =
+    let !(op, rest) = Text.span isSymbolChar t
+     in if Text.null op then Nothing else Just (Text.length op, rest)
--- a/src/Lexer.hs
+++ b/src/Lexer.hs
@ -0,0 +1,190 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module Lexer (
+  Token (..),
+  lexer,
+  emphasis,
+) where
+
+import Control.Monad (mfilter)
+import Data.Char (isAlphaNum, isPrint)
+import Data.Functor (($>))
+import Data.Text (Text)
+import Data.Text qualified as Text
+import GHC.Stack (HasCallStack)
+import ParserMonad (Parser, initialParserState)
+import Text.Parsec
+import Text.Parsec qualified as Parsec
+import Text.Parsec.Pos (updatePosChar)
+
+type LocatedToken = (SourcePos, Token)
+
+type Lexer = Parser [LocatedToken]
+
+data Token
+  = Token Text
+  | Anchor
+  | AngleOpen
+  | AngleClose
+  | BoldOpen
+  | BoldClose
+  | BracketOpen
+  | BracketClose
+  | EmphasisOpen
+  | EmphasisClose
+  | MonospaceOpen
+  | MonospaceClose
+  | Newline
+  | ParenOpen
+  | ParenClose
+  | QuoteOpen
+  | QuoteClose
+  | Space
+  | EOF
+  deriving (Eq, Show)
+
+lexer :: String -> Either ParseError [LocatedToken]
+lexer = Parsec.runParser lexText initialParserState "input" . Text.pack
+
+lexText :: (HasCallStack) => Parser [LocatedToken]
+lexText = go
+ where
+  go = do
+    Parsec.optionMaybe Parsec.eof >>= \case
+      Just _ -> pure []
+      Nothing -> do
+        toks <-
+          choice
+            [ newlineToken
+            , spaceToken
+            , textElement
+            , identifier
+            , other
+            ]
+        rest <- go
+        pure (toks <> rest)
+
+match :: Parser a -> Parser (Text, a)
+match p = do
+  input <- getInput
+  result <- p
+  input' <- getInput
+  let !consumed = Text.take (Text.length input - Text.length input') input
+  pure (consumed, result)
+
+-- Tokens
+
+textElement :: Parser [LocatedToken]
+textElement =
+  choice $
+    Parsec.try
+      <$> [ emphasis
+          , bold
+          , monospace
+          , parens
+          , brackets
+          , angles
+          ]
+
+delimited :: String -> String -> Token -> Token -> Parser [LocatedToken]
+delimited c1 c2 ot ct = do
+  pos <- getPosition
+  (_, content) <- match $ between op cl any'
+
+  innerToks <- case lexer $ Text.unpack content of
+    Left _ -> do
+      pos' <- getPosition
+      pure $ [(pos', Token content)]
+    Right toks -> pure toks
+
+  let openTok :: LocatedToken = (pos, ot)
+      closeTok :: LocatedToken = (pos, ct)
+
+  pure $ openTok : innerToks <> [closeTok]
+ where
+  op = string c1
+  cl = string c2
+  any' = Text.pack <$> manyTill anyChar (lookAhead cl)
+
+delimited' :: String -> Token -> Token -> Parser [LocatedToken]
+delimited' s t1 t2 = delimited s s t1 t2
+
+emphasis :: Lexer
+emphasis = delimited' "/" EmphasisOpen EmphasisClose
+
+bold :: Lexer
+bold = delimited' "__" BoldOpen BoldClose
+
+monospace :: Lexer
+monospace = delimited' "@" MonospaceOpen MonospaceClose
+
+parens :: Parser [LocatedToken]
+parens = delimited "(" ")" ParenOpen ParenClose
+
+brackets :: Lexer
+brackets = delimited "[" "]" ParenOpen ParenClose
+
+angles :: Parser [LocatedToken]
+angles = delimited "<" ">" AngleOpen AngleClose
+
+other :: Lexer
+other = do
+  pos <- getPosition
+  c <- takeWhile1_ isPrint
+  pure . pure $ (pos, Token c)
+
+spaceToken :: Lexer
+spaceToken = do
+  pos <- getPosition
+  _ <- many1 (char ' ')
+  pure . pure $ (pos, Space)
+
+newlineToken :: Lexer
+newlineToken = do
+  pos <- getPosition
+  _ <- newline
+  pure . pure $ (pos, Newline)
+
+identifier :: Lexer
+identifier = do
+  pos <- getPosition
+  txt <- takeWhile1_ isAlphaNum
+  pure . pure $ (pos, Token txt)
+
+-------
+-- Helpers
+-------
+
+-- | Like `takeWhile`, but unconditionally take escaped characters.
+takeWhile_ :: (Char -> Bool) -> Parser Text
+takeWhile_ p = scan p_ False
+ where
+  p_ escaped c
+    | escaped = Just False
+    | not $ p c = Nothing
+    | otherwise = Just (c == '\\')
+
+-- | Like 'takeWhile1', but unconditionally take escaped characters.
+takeWhile1_ :: (Char -> Bool) -> Parser Text
+takeWhile1_ = mfilter (not . Text.null) . takeWhile_
+
+{- | Scan the input text, accumulating characters as long as the scanning
+function returns true.
+-}
+scan ::
+  -- | scan function
+  (state -> Char -> Maybe state) ->
+  -- | initial state
+  state ->
+  Parser Text
+scan f initState = do
+  parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
+  (remaining, finalPos, ct) <- go input initState pos 0
+  let newState = parserState{stateInput = remaining, statePos = finalPos}
+  Parsec.setParserState newState $> Text.take ct input
+ where
+  go !input' !st !posAccum !count' = case Text.uncons input' of
+    Nothing -> pure (input', posAccum, count')
+    Just (char', input'') -> case f st char' of
+      Nothing -> pure (input', posAccum, count')
+      Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
--- a/src/Parser.hs
+++ b/src/Parser.hs
@ -0,0 +1,228 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+module Parser (
+  parse,
+  parseText,
+) where
+
+import Identifier
+import ParserMonad
+import Types
+
+import Control.Applicative
+import Control.Monad
+import Data.Char qualified as Char
+import Data.Functor (($>))
+import Data.List (intercalate)
+import Data.Text (Text)
+import Data.Text qualified as Text
+import Parser.Util
+import Text.Parsec qualified as Parsec
+
+parse :: Parser a -> Text -> (ParserState, a)
+parse parser = either err id . parse' (parser <* Parsec.eof)
+ where
+  err = error . ("Haddock.Parser.parse: " ++)
+
+--------------------
+-- Markup
+--------------------
+
+{- | Skips a single special character and treats it as a plain string.
+This is done to skip over any special characters belonging to other
+elements but which were not deemed meaningful at their positions.
+-}
+skipSpecialChar :: Parser (DocMarkup mod a)
+skipSpecialChar = DocString . pure <$> Parsec.oneOf specialChar
+
+{- | Plain, regular parser for text. Called as one of the last parsers
+to ensure that we have already given a chance to more meaningful parsers
+before capturing their characters.
+-}
+string' :: Parser (DocMarkup mod a)
+string' =
+  DocString
+    -- After the first character, stop for @\(@ or @\[@ math starters. (The
+    -- first character won't start a valid math string because this parser
+    -- should follow math parsers. But this parser is expected to accept at
+    -- least one character from all inputs that don't start with special
+    -- characters, so the first character parser can't have the @"(["@
+    -- restriction.)
+    <$> ((:) <$> rawOrEscChar "" <*> many (rawOrEscChar "(["))
+ where
+  -- \| Parse a single logical character, either raw or escaped. Don't accept
+  -- escaped characters from the argument string.
+  rawOrEscChar :: [Char] -> Parser Char
+  rawOrEscChar restrictedEscapes =
+    Parsec.try $
+      Parsec.noneOf specialChar >>= \case
+        -- Handle backslashes:
+        --   - Fail on forbidden escape characters.
+        --   - Non-forbidden characters: simply unescape, e.g. parse "\b" as 'b',
+        --   - Trailing backslash: treat it as a raw backslash, not an escape
+        --     sequence. (This is the logic that this parser followed when this
+        --     comment was written; it is not necessarily intentional but now I
+        --     don't want to break anything relying on it.)
+        '\\' -> Parsec.noneOf restrictedEscapes <|> Parsec.eof $> '\\'
+        c -> pure c
+
+{- | Emphasis parser.
+
+>>> parseString "/Hello world/"
+DocEmphasis (DocString "Hello world")
+-}
+emphasis :: Parser (DocMarkup mod Identifier)
+emphasis =
+  DocEmphasis . parseAll
+    <$> disallowNewline ("/" *> takeWhile1_ (/= '/') <* "/")
+
+{- | Bold parser.
+
+>>> parseString "__Hello world__"
+DocBold (DocString "Hello world")
+-}
+bold :: Parser (DocMarkup mod Identifier)
+bold = DocBold . parseAll <$> disallowNewline ("__" *> takeUntil "__")
+
+{- | Monospaced strings.
+
+>>> parseString "@cruel@"
+DocMonospaced (DocString "cruel")
+-}
+monospace :: Parser (DocMarkup mod Identifier)
+monospace =
+  DocMonospace . parseAll
+    <$> ("@" *> takeWhile1_ (/= '@') <* "@")
+
+{- | Text anchors to allow for jumping around the generated documentation.
+
+>>> parseString "#Hello world#"
+DocAName "Hello world"
+-}
+anchor :: Parser (DocMarkup mod a)
+anchor =
+  DocAnchor . Text.unpack
+    <$> ("#" *> takeWhile1_ (\x -> x /= '#' && not (Char.isSpace x)) <* "#")
+
+-- | Parses identifiers with help of 'parseValid'.
+identifier :: Parser (DocMarkup mod Identifier)
+identifier = DocIdentifier <$> parseValid
+
+{- | Module names.
+
+Note that we allow '#' and '\' to support anchors (old style anchors are of
+the form "SomeModule\#anchor").
+-}
+moduleName :: Parser (DocMarkup mod a)
+moduleName = DocModule . flip ModuleLink Nothing <$> ("\"" *> moduleNameString <* "\"")
+
+-- | A module name, optionally with an anchor
+moduleNameString :: Parser String
+moduleNameString = moduleId `maybeFollowedBy` anchor_
+ where
+  moduleId = intercalate "." <$> conid `Parsec.sepBy1` "."
+  anchor_ =
+    (++)
+      <$> (Parsec.string "#" <|> Parsec.string "\\#")
+      <*> many (Parsec.satisfy (\c -> c /= '"' && not (Char.isSpace c)))
+
+  maybeFollowedBy pre suf = (\x -> maybe x (x ++)) <$> pre <*> optional suf
+
+  conid =
+    (:)
+      <$> Parsec.satisfy (\c -> Char.isAlpha c && Char.isUpper c)
+      <*> many conChar
+
+  conChar = Parsec.alphaNum <|> Parsec.char '_'
+
+------------------------
+-- Markup components
+------------------------
+
+{- | List of characters that we use to delimit any special markup.
+Once we have checked for any of these and tried to parse the
+relevant markup, we can assume they are used as regular text.
+-}
+specialChar :: [Char]
+specialChar = "_/<@\"&'`#[ "
+
+------------------------
+--  Helpers
+------------------------
+
+parse' :: Parser a -> Text -> Either String (ParserState, a)
+parse' parser t =
+  let parser' = (,) <$> parser <*> Parsec.getState
+   in case Parsec.runParser parser' initialParserState "<haddock>" t of
+        Left e -> Left (show e)
+        Right (x, s) -> Right (s, x)
+
+docConcat :: [DocMarkup mod id] -> DocMarkup mod id
+docConcat = foldr docAppend DocEmpty
+ where
+  -- Prevent doc append from becoming too nested
+  docAppend (DocDefinitionList ds1) (DocDefinitionList ds2) = DocDefinitionList (ds1 <> ds2)
+  docAppend (DocDefinitionList ds1) (DocAppend (DocDefinitionList ds2) d) = DocAppend (DocDefinitionList (ds1 <> ds2)) d
+  docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 <> ds2)
+  docAppend (DocOrderedList ds1) (DocAppend (DocOrderedList ds2) d) = DocAppend (DocOrderedList (ds1 <> ds2)) d
+  docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 <> ds2)
+  docAppend (DocUnorderedList ds1) (DocAppend (DocUnorderedList ds2) d) = DocAppend (DocUnorderedList (ds1 <> ds2)) d
+  docAppend DocEmpty d = d
+  docAppend d DocEmpty = d
+  docAppend (DocString s1) (DocString s2) = DocString (s1 <> s2)
+  docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 <> s2))
+  docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 <> s2)) d
+  docAppend d1 d2 = DocAppend d1 d2
+
+{- | Parse a text paragraph. Actually just a wrapper over 'parseAll' which
+drops leading whitespace.
+-}
+parseText :: Text -> DocMarkup mod Identifier
+parseText = parseAll . Text.dropWhile Char.isSpace . Text.filter (/= '\r')
+
+parseAll :: Text -> DocMarkup mod Identifier
+parseAll = snd . parse myParser
+ where
+  -- docConcat
+  --   <$> many
+  --     ( choice'
+  --         [ monospace
+  --         , anchor
+  --         , identifier
+  --         , moduleName
+  --         , picture
+  --         , mathDisplay
+  --         , mathInline
+  --         , markdownImage
+  --         , markdownLink
+  --         , hyperlink
+  --         , bold
+  --         , emphasis
+  --         , encodedChar
+  --         , string'
+  --         , skipSpecialChar
+  --         ]
+  --     )
+  myParser :: Parser (DocMarkup mod Identifier)
+  myParser =
+    docConcat
+      <$> many
+        ( choice'
+            [ monospace
+            , anchor
+            , identifier
+            , moduleName
+            , bold
+            , emphasis
+            , string'
+            , skipSpecialChar
+            ]
+        )
+
+choice' :: [Parser a] -> Parser a
+choice' [] = empty
+choice' [p] = p
+choice' (p : ps) = Parsec.try p <|> choice' ps
+
+disallowNewline :: Parser Text -> Parser Text
+disallowNewline = mfilter (Text.all (/= '\n'))
--- a/src/Parser/Util.hs
+++ b/src/Parser/Util.hs
@ -0,0 +1,65 @@
+module Parser.Util where
+
+import Control.Monad (mfilter)
+import Data.Functor (($>))
+import Data.Text (Text)
+import Data.Text qualified as Text
+import ParserMonad (Parser)
+import Text.Parsec (State (..))
+import Text.Parsec qualified as Parsec
+import Text.Parsec.Pos (updatePosChar)
+
+{- | Consume characters from the input up to and including the given pattern.
+Return everything consumed except for the end pattern itself.
+-}
+takeUntil :: Text -> Parser Text
+takeUntil end_ = Text.dropEnd (Text.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome
+ where
+  end = Text.unpack end_
+
+  p :: (Bool, String) -> Char -> Maybe (Bool, String)
+  p acc c = case acc of
+    (True, _) -> Just (False, end)
+    (_, []) -> Nothing
+    (_, x : xs) | x == c -> Just (False, xs)
+    _ -> Just (c == '\\', end)
+
+  requireEnd = mfilter (Text.isSuffixOf end_)
+
+  gotSome xs
+    | Text.null xs = fail "didn't get any content"
+    | otherwise = return xs
+
+-- | Like `takeWhile`, but unconditionally take escaped characters.
+takeWhile_ :: (Char -> Bool) -> Parser Text
+takeWhile_ p = scan p_ False
+ where
+  p_ escaped c
+    | escaped = Just False
+    | not $ p c = Nothing
+    | otherwise = Just (c == '\\')
+
+-- | Like 'takeWhile1', but unconditionally take escaped characters.
+takeWhile1_ :: (Char -> Bool) -> Parser Text
+takeWhile1_ = mfilter (not . Text.null) . takeWhile_
+
+{- | Scan the input text, accumulating characters as long as the scanning
+function returns true.
+-}
+scan ::
+  -- | scan function
+  (state -> Char -> Maybe state) ->
+  -- | initial state
+  state ->
+  Parser Text
+scan f initState = do
+  parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
+  (remaining, finalPos, ct) <- go input initState pos 0
+  let newState = parserState{stateInput = remaining, statePos = finalPos}
+  Parsec.setParserState newState $> Text.take ct input
+ where
+  go !input' !st !posAccum !count' = case Text.uncons input' of
+    Nothing -> pure (input', posAccum, count')
+    Just (char', input'') -> case f st char' of
+      Nothing -> pure (input', posAccum, count')
+      Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
--- a/src/ParserMonad.hs
+++ b/src/ParserMonad.hs
@ -0,0 +1,26 @@
+module ParserMonad where
+
+import Data.String
+import Data.Text (Text)
+import Data.Text qualified as Text
+import Text.Parsec (Parsec)
+import Text.Parsec qualified as Parsec
+
+import Types
+
+type Parser = Parsec Text ParserState
+
+instance (a ~ Text) => IsString (Parser a) where
+  fromString = fmap Text.pack . Parsec.string
+
+{- | The only bit of information we really care about trudging along with us
+through parsing is the version attached to a @\@since@ annotation - if
+the doc even contained one.
+-}
+newtype ParserState = ParserState
+  { since :: Maybe Since
+  }
+  deriving (Eq, Show)
+
+initialParserState :: ParserState
+initialParserState = ParserState Nothing
--- a/src/Types.hs
+++ b/src/Types.hs
@ -0,0 +1,196 @@
+module Types (
+  DocMarkup (..),
+  Document (..),
+  Meta (..),
+  ModuleLink (..),
+  Package,
+  Since (..),
+  Version,
+)
+where
+
+newtype Document = Document
+  { meta :: Meta
+  }
+  deriving (Eq, Show)
+
+newtype Meta = Meta
+  { since :: Maybe Since
+  }
+  deriving (Eq, Show)
+
+data Since = Since
+  { package :: Maybe Package
+  -- ^ optional package qualification
+  , version :: Version
+  }
+  deriving (Eq, Show)
+
+-- Could have a better type?
+type Version = [Int]
+type Package = String
+
+data DocMarkup mod id
+  = DocEmpty
+  | -- | This is not represented in the markup language, this is for internal use
+    DocAppend (DocMarkup mod id) (DocMarkup mod id)
+  | -- | Any text that doesn't match any rules is a bare string
+    DocString String
+  | -- | Paragraphs are demarcated by blank lines
+    DocParagraph (DocMarkup mod id)
+  | -- | A haskell identifier
+    DocIdentifier id
+  | -- | A qualified identifier that couldn't be resolved.
+    DocIdentifierUnchecked
+  | -- | A link to a module, might include a label
+    DocModule (ModuleLink (DocMarkup mod id))
+  | -- | Emphasis /italics/
+    DocEmphasis (DocMarkup mod id)
+  | -- | Monospaced @source code@
+    DocMonospace (DocMarkup mod id)
+  | -- | Bold __bold text__
+    DocBold (DocMarkup mod id)
+  | {- | Unordered lists
+      * this
+      or
+      - this
+    -}
+    DocUnorderedList [DocMarkup mod id]
+  | {- | Ordered lists
+      1. this
+      or
+      (1) this
+    -}
+    DocOrderedList [(Int, DocMarkup mod id)]
+  | {- | Definition lists
+      [term] a term
+      [another term] another definition
+    -}
+    DocDefinitionList [(DocMarkup mod id, DocMarkup mod id)]
+  | {- | Code blocks
+      @
+      a code block in here
+      with multiple lines
+      @
+
+      Or with bird tracks:
+      > some code
+      > goes here
+    -}
+    DocCodeBlock (DocMarkup mod id)
+  | {- | Hyperlinks
+      __marked__:
+      <http://example.com>
+      <http://example.com label text>
+      __Auto-detected URLs__:
+      http://example.com
+      https://example.com
+      ftp://example.com
+      __Markdown style__
+      [link text](http://example.com)
+      [link text]("Module.Name")
+    -}
+    DocHyperlink (Hyperlink (DocMarkup mod id))
+  | {- | Pictures
+        <<image.png>>
+        <<image.png title text>>
+
+        __Markdown Images__
+
+        ![alt text](image.png)
+    -}
+    DocPicture Picture
+  | {- | Inline math expressions
+      \(mathematical expression\)
+    -}
+    DocMathInline String
+  | {- | Math multiline display
+    \[
+      mathematical expression
+      in multiple lines
+    \]
+    -}
+    DocMathDisplay String
+  | {- | Anchors, no spaces allowed
+      #anchor-name#
+    -}
+    DocAnchor String
+  | {- | Property descriptions
+      prop> property description
+    -}
+    DocProperty String
+  | {- | Examples
+      >>> expression
+      result line 1
+      result line 2
+    -}
+    DocExamples [Example]
+  | -- | Header
+    DocHeader (Header (DocMarkup mod id))
+  | -- Table
+    DocTable (Table (DocMarkup mod id))
+  deriving (Eq, Show)
+
+instance Semigroup (DocMarkup mod id) where
+  (<>) = DocAppend
+
+instance Monoid (DocMarkup mod id) where
+  mempty = DocEmpty
+  mconcat = foldr (<>) mempty
+
+data ModuleLink id = ModuleLink
+  { name :: String
+  , label :: Maybe id
+  }
+  deriving (Eq, Show)
+
+data Picture = Picture
+  { uri :: String
+  , title :: Maybe String
+  }
+  deriving (Eq, Show)
+
+data Hyperlink id = Hyperlink
+  { url :: String
+  , label :: Maybe id
+  }
+  deriving (Eq, Show, Functor, Foldable, Traversable)
+
+data TableCell id = TableCell
+  { col :: Int
+  , row :: Int
+  , content :: id
+  }
+  deriving (Eq, Show, Functor, Foldable, Traversable)
+
+newtype TableRow id = TableRow
+  { rows :: [TableCell id]
+  }
+  deriving (Eq, Show, Functor, Foldable, Traversable)
+
+data Table id = Table
+  { headerRows :: [TableRow id]
+  , bodyRows :: [TableRow id]
+  }
+  deriving (Eq, Show, Functor, Foldable, Traversable)
+
+data Example = Example
+  { exampleExpression :: String
+  , exampleResult :: [String]
+  }
+  deriving (Eq, Show)
+
+data Header id = Header
+  { level :: HeaderLevel
+  , title :: id
+  }
+  deriving (Eq, Show)
+
+data HeaderLevel
+  = H1
+  | H2
+  | H3
+  | H4
+  | H5
+  | H6
+  deriving (Eq, Show, Bounded, Enum)
--- a/test/Spec.hs
+++ b/test/Spec.hs
@ -0,0 +1,34 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# OPTIONS_GHC -Wno-orphans #-}
+
+import Test.Hspec
+
+import Data.String (IsString (..))
+import Data.Text (Text)
+
+import Identifier (Identifier)
+import Lexer
+import Parser
+import Types
+
+main :: IO ()
+main = hspec $ do
+  describe "Lexer" do
+    it "lexes" do
+      lexer "This is string" `shouldBe` undefined
+  describe "Parser" do
+    it "Bold" do
+      "__bold__" `shouldParseTo` (DocBold (DocString "bold"))
+    it "Emphasis" do
+      "/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis"))
+
+shouldParseTo :: Text -> DocMarkup mod Identifier -> Expectation
+shouldParseTo input ast = parseText input `shouldBe` ast
+
+type Doc id = DocMarkup () id
+
+instance IsString (Doc String) where
+  fromString = DocString
+
+file :: IO String
+file = readFile "test/markup.md"
--- a/test/markup.md
+++ b/test/markup.md
@ -0,0 +1,89 @@
+/emphasized text/
+
+__bold text__
+
+@monospace text@
+
+"Module.Name"
+"Module.Name#anchor"
+"Module.Name#anchor"
+
+[http://example.com](http://example.com)
+<[http://example.com](http://example.com) label text>
+
+[http://example.com](http://example.com)
+[https://example.com](https://example.com)
+ftp\://example.com
+
+[link text](http://example.com)
+[link text]("Module.Name")
+
+#anchor-name#
+
+'identifier'
+
+<<image.png>>
+<<image.png title text>>
+
+![alt text](image.png)
+
+\(mathematical expression\)
+\[mathematical expression\]
+
+@
+code block content
+with multiple lines
+@
+
+> code line 1
+> code line 2
+
+>>> expression
+result line 1
+result line 2
+
+>>> another expression
+result
+
+prop> property description
+
+* item 1
+* item 2
+  continued content
+
+- item 1
+- item 2
+
+1. item 1
+2. item 2
+
+(1) item 1
+(2) item 2
+
+[term] definition content
+[another term] more definition content
+
+----------+----------+
+| Header 1 | Header 2 |
+==========+==========+
+| Cell 1   | Cell 2   |
+----------+----------+
+| Cell 3   | Cell 4   |
+----------+----------+
+
+= Level 1 Header
+== Level 2 Header
+=== Level 3 Header
+==== Level 4 Header
+===== Level 5 Header
+====== Level 6 Header
+
+@since package-name-1.2.3
+@since 1.2.3
+
+&#65;     (decimal)
+&#x41;    (hexadecimal)
+&#X41;    (hexadecimal)
+
+
+This \@escapes\@ at signs