This commit is contained in:
Igor Ranieri 2025-09-21 08:19:01 +02:00
commit c9f61c4e06
16 changed files with 1366 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
dist-newstyle

5
CHANGELOG.md Normal file
View file

@ -0,0 +1,5 @@
# Revision history for haddock2
## 0.1.0.0 -- YYYY-mm-dd
* First version. Released on an unsuspecting world.

88
Grammar.ebnf Normal file
View file

@ -0,0 +1,88 @@
document ::= paragraph*
paragraph ::= ( text_paragraph | code_block | header | list | table ) newline?
text_paragraph ::= text_element+
text_element ::= emphasis | bold | monospace | link | anchor | identifier |
inline_math | image | plain_text | escaped_char
emphasis ::= '/' text_no_newline '/'
bold ::= '__' text_no_newline '__'
monospace ::= '@' text_content '@'
link ::= module_link | hyperlink | markdown_link
module_link ::= '"' module_name ( '#' anchor_name )? '"'
hyperlink ::= '<' url ( ' ' link_text )? '>'
markdown_link ::= '[' link_text '](' ( url | module_link ) ')'
anchor ::= '#' anchor_name '#'
identifier ::= "'" haskell_id "'"
inline_math ::= '\(' math_expr '\)'
image ::= '<<' image_path ( ' ' image_title )? '>>' |
'![' alt_text '](' image_path ')'
code_block ::= at_block | bird_tracks | example_block | property_block
at_block ::= '@' newline code_content newline '@'
bird_tracks ::= ( '>' ' '? code_line newline )+
example_block ::= ( '>>>' ' ' expression newline result_line* )+
property_block ::= 'prop>' ' ' property_desc newline
header ::= header_marker ' ' header_text newline
header_marker ::= '=' | '==' | '===' | '====' | '=====' | '======'
list ::= unordered_list | ordered_list | definition_list
unordered_list ::= ( list_marker ' ' list_content )+
list_marker ::= '*' | '-'
ordered_list ::= ( number_marker ' ' list_content )+
number_marker ::= digit+ '.' | '(' digit+ ')'
definition_list ::= ( '[' term ']' ':'? ' ' definition_content )+
table ::= table_border header_row header_sep data_row* table_border
table_border ::= '+' ( '-' | '+' )* newline
header_row ::= '|' ( table_cell '|' )* newline
header_sep ::= '+' ( '=' | '+' )* newline
data_row ::= '|' ( table_cell '|' )* newline
plain_text ::= text_char+
text_char ::= letter | digit | ' ' | punctuation
text_no_newline ::= ( letter | digit | ' ' | safe_punctuation )+
text_content ::= ( letter | digit | ' ' | newline | punctuation )*
code_content ::= code_char*
code_char ::= letter | digit | ' ' | newline | punctuation
code_line ::= ( letter | digit | ' ' | punctuation )*
list_content ::= text_element* ( newline ' ' text_element* )?
table_cell ::= ' ' table_char* ' '
table_char ::= letter | digit | ' ' | safe_punctuation
header_text ::= ( letter | digit | ' ' | punctuation )+
link_text ::= link_char+
link_char ::= letter | digit | ' ' | safe_punctuation
alt_text ::= alt_char+
alt_char ::= letter | digit | ' ' | safe_punctuation
image_path ::= ( letter | digit | '.' | '/' | '-' | '_' )+
image_title ::= image_title_char+
image_title_char ::= letter | digit | ' ' | safe_punctuation
anchor_name ::= letter ( letter | digit | '-' )*
haskell_id ::= letter ( letter | digit | '_' | "'" )*
module_name ::= haskell_id ( '.' haskell_id )*
url ::= ( 'http://' | 'https://' | 'ftp://' ) url_char+
url_char ::= letter | digit | '/' | '.' | ':' | '-' | '_' | '?' | '&' | '='
math_expr ::= math_char+
math_char ::= letter | digit | ' ' | math_punctuation
property_desc ::= ( letter | digit | ' ' | punctuation )+
expression ::= ( letter | digit | ' ' | punctuation )+
result_line ::= result_char* newline
result_char ::= letter | digit | ' ' | safe_punctuation
term ::= term_char+
term_char ::= letter | digit | ' ' | safe_punctuation
definition_content ::= text_element+
escaped_char ::= '\' special_char
special_char ::= '/' | '*' | '@' | "'" | '"' | '#' | '<' | '>' | '[' | ']' | '(' | ')' | '|' | '=' | '-' | '+'
safe_punctuation ::= '!' | '$' | '%' | '^' | '&' | '(' | ')' | '_' | '+' | '{' | '}' | ';' | ':' | ',' | '.' | '`' | '~'
math_punctuation ::= '+' | '-' | '*' | '/' | '^' | '_' | '=' | '(' | ')' | '{' | '}' | '[' | ']'
punctuation ::= '!' | '@' | '#' | '$' | '%' | '^' | '&' | '*' | '(' | ')' | '-' | '_' | '+' | '=' | '{' | '}' | '[' | ']' | '|' | '\' | ':' | ';' | '"' | "'" | '<' | '>' | '?' | ',' | '.' | '/' | '`' | '~'
letter ::= 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z'
digit ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
newline ::= #xA | #xD #xA | #xD

29
LICENSE Normal file
View file

@ -0,0 +1,29 @@
Copyright (c) 2025, Igor Ranieri
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

4
app/Main.hs Normal file
View file

@ -0,0 +1,4 @@
module Main where
main :: IO ()
main = putStrLn "Hello, Haskell!"

1
cabal.project.local Normal file
View file

@ -0,0 +1 @@
tests: True

55
haddock2.cabal Normal file
View file

@ -0,0 +1,55 @@
cabal-version: 3.4
name: haddock2
version: 0.1.0.0
license: BSD-3-Clause
license-file: LICENSE
author: Igor Ranieri
maintainer: igor@elland.me
build-type: Simple
extra-doc-files: CHANGELOG.md
common common
ghc-options: -Wall
default-extensions:
BlockArguments
DuplicateRecordFields
NoFieldSelectors
executable haddock2
import: common
main-is: Main.hs
build-depends: base >=4.20.1.0
hs-source-dirs: app
default-language: GHC2024
library haddock2-lib
import: common
build-depends:
base >=4.20.1.0,
parsec ^>=3.1.18.0,
text ^>=2.1.2,
hs-source-dirs: src
-- cabal-gild: discover src
exposed-modules:
Identifier
Lexer
Parser
Parser.Util
ParserMonad
Types
default-language: GHC2024
test-suite haddock2-test
import: common
type: exitcode-stdio-1.0
main-is: Spec.hs
build-depends:
base >=4.20.1.0,
haddock2:{haddock2-lib},
hspec ^>=2.11.0,
text ^>=2.1.2,
hs-source-dirs: test
default-language: GHC2024

198
markup.md Normal file
View file

@ -0,0 +1,198 @@
# Haddock Markup Language Examples
## Text Formatting
### Emphasis
```
/emphasized text/
```
Single line only, no newlines allowed.
### Bold
```
__bold text__
```
Single line only, no newlines allowed.
### Monospace/Code
```
@monospace text@
```
Can span multiple lines.
## Links and References
### Module Links
```
"Module.Name"
"Module.Name#anchor"
"Module.Name\#anchor"
```
### Hyperlinks
```
<http://example.com>
<http://example.com label text>
```
Auto-detected URLs:
```
http://example.com
https://example.com
ftp://example.com
```
### Markdown-style Links
```
[link text](http://example.com)
[link text]("Module.Name")
```
### Anchors
```
#anchor-name#
```
No spaces allowed in anchor names.
### Identifiers
```
'identifier'
```
Links to Haskell identifiers.
## Images
### Basic Images
```
<<image.png>>
<<image.png title text>>
```
### Markdown Images
```
![alt text](image.png)
```
## Math
### Inline Math
```
\(mathematical expression\)
```
Single line only.
### Display Math
```
\[mathematical expression\]
```
Can span multiple lines.
## Code and Examples
### Code Blocks
```
@
code block content
with multiple lines
@
```
### Bird Tracks (Code)
```
> code line 1
> code line 2
```
Each line starts with `>` followed by optional space.
### Examples
```
>>> expression
result line 1
result line 2
>>> another expression
result
```
### Properties
```
prop> property description
```
## Lists
### Unordered Lists
```
* item 1
* item 2
continued content
- item 1
- item 2
```
### Ordered Lists
```
1. item 1
2. item 2
(1) item 1
(2) item 2
```
### Definition Lists
```
[term] definition content
[another term] more definition content
```
Optional colon after closing bracket.
## Tables
### Grid Tables
```
+----------+----------+
| Header 1 | Header 2 |
+==========+==========+
| Cell 1 | Cell 2 |
+----------+----------+
| Cell 3 | Cell 4 |
+----------+----------+
```
- First row determines table width
- Header separator uses `=` characters
- Regular separators use `-`
- Edges can be `+` or `|`
## Headers
```
= Level 1 Header
== Level 2 Header
=== Level 3 Header
==== Level 4 Header
===== Level 5 Header
====== Level 6 Header
```
Up to 6 levels deep.
## Special Elements
### Since Annotations
```
@since package-name-1.2.3
@since 1.2.3
```
### Numeric Character References
```
&#65; (decimal)
&#x41; (hexadecimal)
&#X41; (hexadecimal)
```
## Escaping
Use backslash `\` to escape special characters. Trailing backslash without following character is treated as literal backslash.
## Structure
- Paragraphs separated by blank lines
- 4-space indentation for nested content
- Whitespace handling varies by context

157
src/Identifier.hs Normal file
View file

@ -0,0 +1,157 @@
module Identifier (
Identifier (..),
Namespace (..),
parseValid,
) where
import Data.Text qualified as Text
import Text.Parsec qualified as Parsec
import Control.Monad (guard)
import Data.Char (isAlpha, isAlphaNum)
import Data.Functor (($>))
import Data.Maybe (listToMaybe, maybeToList)
import Data.Text (Text)
import ParserMonad
import Text.Parsec (State (..))
import Text.Parsec.Pos (updatePosChar)
import Text.Read.Lex (isSymbolChar)
-- | Identifier string surrounded with namespace, opening, and closing quotes/backticks.
data Identifier = Identifier !Namespace !Char String !Char
deriving (Show, Eq)
-- | The namespace qualification for an identifier.
data Namespace = Value | Type | None deriving (Eq, Ord, Enum, Show)
parseValid :: Parser Identifier
parseValid = do
state@State{stateInput = input, statePos = pos} <- Parsec.getParserState
case takeIdentifier input of
Nothing -> Parsec.parserFail "parseValid: Failed to match a valid identifier"
Just (namespace, op, ident, cl, input') ->
let posOp = updatePosChar pos op
posIdent = Text.foldl updatePosChar posOp ident
posCl = updatePosChar posIdent cl
newState = state{stateInput = input', statePos = posCl}
in Parsec.setParserState newState $> Identifier namespace op (Text.unpack ident) cl
{- | Try to parse a delimited identifier off the front of the given input.
This tries to match as many valid Haskell identifiers/operators as possible,
to the point of sometimes accepting invalid things (ex: keywords). Some
considerations:
- operators and identifiers can have module qualifications
- operators can be wrapped in parens (for prefix)
- identifiers can be wrapped in backticks (for infix)
- delimiters are backticks or regular ticks
- since regular ticks are also valid in identifiers, we opt for the
longest successful parse
This function should make /O(1)/ allocations
-}
takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text)
takeIdentifier input = listToMaybe $ do
-- Optional namespace
let (namespace, input') = case Text.uncons input of
Just ('v', i) -> (Value, i)
Just ('t', i) -> (Type, i)
_ -> (None, input)
-- Opening tick
(op, input'') <- maybeToList (Text.uncons input')
guard (op == '\'' || op == '`')
-- Identifier/operator
(ident, input''') <- wrapped input''
-- Closing tick
(cl, input'''') <- maybeToList (Text.uncons input''')
guard (cl == '\'' || cl == '`')
pure (namespace, op, ident, cl, input'''')
where
-- \| Parse out a wrapped, possibly qualified, operator or identifier
wrapped t = do
(c, t') <- maybeToList (Text.uncons t)
-- Tuples
case c of
'('
| Just (c', _) <- Text.uncons t'
, c' == ',' || c' == ')' ->
do
let (commas, t'') = Text.span (== ',') t'
(')', t''') <- maybeToList (Text.uncons t'')
pure (Text.take (Text.length commas + 2) t, t''')
-- Parenthesized
'(' -> do
(n, t'') <- general False 0 [] t'
(')', t''') <- maybeToList (Text.uncons t'')
pure (Text.take (n + 2) t, t''')
-- Backticked
'`' -> do
(n, t'') <- general False 0 [] t'
('`', t''') <- maybeToList (Text.uncons t'')
pure (Text.take (n + 2) t, t''')
-- Unadorned
_ -> do
(n, t'') <- general False 0 [] t
pure (Text.take n t, t'')
-- \| Parse out a possibly qualified operator or identifier
general ::
Bool ->
-- \^ refuse inputs starting with operators
Int ->
-- \^ total characters \"consumed\" so far
[(Int, Text)] ->
-- \^ accumulated results
Text ->
-- \^ current input
[(Int, Text)]
-- \^ total characters parsed & what remains
general !identOnly !i acc t
-- Starts with an identifier (either just an identifier, or a module qual)
| Just (n, rest) <- identLike t =
if Text.null rest
then acc
else case Text.head rest of
'`' -> (n + i, rest) : acc
')' -> (n + i, rest) : acc
'.' -> general False (n + i + 1) acc (Text.tail rest)
'\'' ->
let (m, rest') = quotes rest
in general True (n + m + 1 + i) ((n + m + i, rest') : acc) (Text.tail rest')
_ -> acc
-- An operator
| Just (n, rest) <- optr t
, not identOnly =
(n + i, rest) : acc
-- Anything else
| otherwise =
acc
-- \| Parse an identifier off the front of the input
identLike t
| Text.null t = Nothing
| isAlpha (Text.head t) || '_' == Text.head t =
let !(idt, rest) = Text.span (\c -> isAlphaNum c || c == '_') t
!(octos, rest') = Text.span (== '#') rest
in Just (Text.length idt + Text.length octos, rest')
| otherwise = Nothing
-- \| Parse all but the last quote off the front of the input
-- PRECONDITION: T.head t `elem` ['\'', '`']
quotes :: Text -> (Int, Text)
quotes t =
let !n = Text.length (Text.takeWhile (`elem` ['\'', '`']) t) - 1
in (n, Text.drop n t)
-- \| Parse an operator off the front of the input
optr t =
let !(op, rest) = Text.span isSymbolChar t
in if Text.null op then Nothing else Just (Text.length op, rest)

190
src/Lexer.hs Normal file
View file

@ -0,0 +1,190 @@
{-# LANGUAGE OverloadedStrings #-}
module Lexer (
Token (..),
lexer,
emphasis,
) where
import Control.Monad (mfilter)
import Data.Char (isAlphaNum, isPrint)
import Data.Functor (($>))
import Data.Text (Text)
import Data.Text qualified as Text
import GHC.Stack (HasCallStack)
import ParserMonad (Parser, initialParserState)
import Text.Parsec
import Text.Parsec qualified as Parsec
import Text.Parsec.Pos (updatePosChar)
type LocatedToken = (SourcePos, Token)
type Lexer = Parser [LocatedToken]
data Token
= Token Text
| Anchor
| AngleOpen
| AngleClose
| BoldOpen
| BoldClose
| BracketOpen
| BracketClose
| EmphasisOpen
| EmphasisClose
| MonospaceOpen
| MonospaceClose
| Newline
| ParenOpen
| ParenClose
| QuoteOpen
| QuoteClose
| Space
| EOF
deriving (Eq, Show)
lexer :: String -> Either ParseError [LocatedToken]
lexer = Parsec.runParser lexText initialParserState "input" . Text.pack
lexText :: (HasCallStack) => Parser [LocatedToken]
lexText = go
where
go = do
Parsec.optionMaybe Parsec.eof >>= \case
Just _ -> pure []
Nothing -> do
toks <-
choice
[ newlineToken
, spaceToken
, textElement
, identifier
, other
]
rest <- go
pure (toks <> rest)
match :: Parser a -> Parser (Text, a)
match p = do
input <- getInput
result <- p
input' <- getInput
let !consumed = Text.take (Text.length input - Text.length input') input
pure (consumed, result)
-- Tokens
textElement :: Parser [LocatedToken]
textElement =
choice $
Parsec.try
<$> [ emphasis
, bold
, monospace
, parens
, brackets
, angles
]
delimited :: String -> String -> Token -> Token -> Parser [LocatedToken]
delimited c1 c2 ot ct = do
pos <- getPosition
(_, content) <- match $ between op cl any'
innerToks <- case lexer $ Text.unpack content of
Left _ -> do
pos' <- getPosition
pure $ [(pos', Token content)]
Right toks -> pure toks
let openTok :: LocatedToken = (pos, ot)
closeTok :: LocatedToken = (pos, ct)
pure $ openTok : innerToks <> [closeTok]
where
op = string c1
cl = string c2
any' = Text.pack <$> manyTill anyChar (lookAhead cl)
delimited' :: String -> Token -> Token -> Parser [LocatedToken]
delimited' s t1 t2 = delimited s s t1 t2
emphasis :: Lexer
emphasis = delimited' "/" EmphasisOpen EmphasisClose
bold :: Lexer
bold = delimited' "__" BoldOpen BoldClose
monospace :: Lexer
monospace = delimited' "@" MonospaceOpen MonospaceClose
parens :: Parser [LocatedToken]
parens = delimited "(" ")" ParenOpen ParenClose
brackets :: Lexer
brackets = delimited "[" "]" ParenOpen ParenClose
angles :: Parser [LocatedToken]
angles = delimited "<" ">" AngleOpen AngleClose
other :: Lexer
other = do
pos <- getPosition
c <- takeWhile1_ isPrint
pure . pure $ (pos, Token c)
spaceToken :: Lexer
spaceToken = do
pos <- getPosition
_ <- many1 (char ' ')
pure . pure $ (pos, Space)
newlineToken :: Lexer
newlineToken = do
pos <- getPosition
_ <- newline
pure . pure $ (pos, Newline)
identifier :: Lexer
identifier = do
pos <- getPosition
txt <- takeWhile1_ isAlphaNum
pure . pure $ (pos, Token txt)
-------
-- Helpers
-------
-- | Like `takeWhile`, but unconditionally take escaped characters.
takeWhile_ :: (Char -> Bool) -> Parser Text
takeWhile_ p = scan p_ False
where
p_ escaped c
| escaped = Just False
| not $ p c = Nothing
| otherwise = Just (c == '\\')
-- | Like 'takeWhile1', but unconditionally take escaped characters.
takeWhile1_ :: (Char -> Bool) -> Parser Text
takeWhile1_ = mfilter (not . Text.null) . takeWhile_
{- | Scan the input text, accumulating characters as long as the scanning
function returns true.
-}
scan ::
-- | scan function
(state -> Char -> Maybe state) ->
-- | initial state
state ->
Parser Text
scan f initState = do
parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
(remaining, finalPos, ct) <- go input initState pos 0
let newState = parserState{stateInput = remaining, statePos = finalPos}
Parsec.setParserState newState $> Text.take ct input
where
go !input' !st !posAccum !count' = case Text.uncons input' of
Nothing -> pure (input', posAccum, count')
Just (char', input'') -> case f st char' of
Nothing -> pure (input', posAccum, count')
Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)

228
src/Parser.hs Normal file
View file

@ -0,0 +1,228 @@
{-# LANGUAGE OverloadedStrings #-}
module Parser (
parse,
parseText,
) where
import Identifier
import ParserMonad
import Types
import Control.Applicative
import Control.Monad
import Data.Char qualified as Char
import Data.Functor (($>))
import Data.List (intercalate)
import Data.Text (Text)
import Data.Text qualified as Text
import Parser.Util
import Text.Parsec qualified as Parsec
parse :: Parser a -> Text -> (ParserState, a)
parse parser = either err id . parse' (parser <* Parsec.eof)
where
err = error . ("Haddock.Parser.parse: " ++)
--------------------
-- Markup
--------------------
{- | Skips a single special character and treats it as a plain string.
This is done to skip over any special characters belonging to other
elements but which were not deemed meaningful at their positions.
-}
skipSpecialChar :: Parser (DocMarkup mod a)
skipSpecialChar = DocString . pure <$> Parsec.oneOf specialChar
{- | Plain, regular parser for text. Called as one of the last parsers
to ensure that we have already given a chance to more meaningful parsers
before capturing their characters.
-}
string' :: Parser (DocMarkup mod a)
string' =
DocString
-- After the first character, stop for @\(@ or @\[@ math starters. (The
-- first character won't start a valid math string because this parser
-- should follow math parsers. But this parser is expected to accept at
-- least one character from all inputs that don't start with special
-- characters, so the first character parser can't have the @"(["@
-- restriction.)
<$> ((:) <$> rawOrEscChar "" <*> many (rawOrEscChar "(["))
where
-- \| Parse a single logical character, either raw or escaped. Don't accept
-- escaped characters from the argument string.
rawOrEscChar :: [Char] -> Parser Char
rawOrEscChar restrictedEscapes =
Parsec.try $
Parsec.noneOf specialChar >>= \case
-- Handle backslashes:
-- - Fail on forbidden escape characters.
-- - Non-forbidden characters: simply unescape, e.g. parse "\b" as 'b',
-- - Trailing backslash: treat it as a raw backslash, not an escape
-- sequence. (This is the logic that this parser followed when this
-- comment was written; it is not necessarily intentional but now I
-- don't want to break anything relying on it.)
'\\' -> Parsec.noneOf restrictedEscapes <|> Parsec.eof $> '\\'
c -> pure c
{- | Emphasis parser.
>>> parseString "/Hello world/"
DocEmphasis (DocString "Hello world")
-}
emphasis :: Parser (DocMarkup mod Identifier)
emphasis =
DocEmphasis . parseAll
<$> disallowNewline ("/" *> takeWhile1_ (/= '/') <* "/")
{- | Bold parser.
>>> parseString "__Hello world__"
DocBold (DocString "Hello world")
-}
bold :: Parser (DocMarkup mod Identifier)
bold = DocBold . parseAll <$> disallowNewline ("__" *> takeUntil "__")
{- | Monospaced strings.
>>> parseString "@cruel@"
DocMonospaced (DocString "cruel")
-}
monospace :: Parser (DocMarkup mod Identifier)
monospace =
DocMonospace . parseAll
<$> ("@" *> takeWhile1_ (/= '@') <* "@")
{- | Text anchors to allow for jumping around the generated documentation.
>>> parseString "#Hello world#"
DocAName "Hello world"
-}
anchor :: Parser (DocMarkup mod a)
anchor =
DocAnchor . Text.unpack
<$> ("#" *> takeWhile1_ (\x -> x /= '#' && not (Char.isSpace x)) <* "#")
-- | Parses identifiers with help of 'parseValid'.
identifier :: Parser (DocMarkup mod Identifier)
identifier = DocIdentifier <$> parseValid
{- | Module names.
Note that we allow '#' and '\' to support anchors (old style anchors are of
the form "SomeModule\#anchor").
-}
moduleName :: Parser (DocMarkup mod a)
moduleName = DocModule . flip ModuleLink Nothing <$> ("\"" *> moduleNameString <* "\"")
-- | A module name, optionally with an anchor
moduleNameString :: Parser String
moduleNameString = moduleId `maybeFollowedBy` anchor_
where
moduleId = intercalate "." <$> conid `Parsec.sepBy1` "."
anchor_ =
(++)
<$> (Parsec.string "#" <|> Parsec.string "\\#")
<*> many (Parsec.satisfy (\c -> c /= '"' && not (Char.isSpace c)))
maybeFollowedBy pre suf = (\x -> maybe x (x ++)) <$> pre <*> optional suf
conid =
(:)
<$> Parsec.satisfy (\c -> Char.isAlpha c && Char.isUpper c)
<*> many conChar
conChar = Parsec.alphaNum <|> Parsec.char '_'
------------------------
-- Markup components
------------------------
{- | List of characters that we use to delimit any special markup.
Once we have checked for any of these and tried to parse the
relevant markup, we can assume they are used as regular text.
-}
specialChar :: [Char]
specialChar = "_/<@\"&'`#[ "
------------------------
-- Helpers
------------------------
parse' :: Parser a -> Text -> Either String (ParserState, a)
parse' parser t =
let parser' = (,) <$> parser <*> Parsec.getState
in case Parsec.runParser parser' initialParserState "<haddock>" t of
Left e -> Left (show e)
Right (x, s) -> Right (s, x)
docConcat :: [DocMarkup mod id] -> DocMarkup mod id
docConcat = foldr docAppend DocEmpty
where
-- Prevent doc append from becoming too nested
docAppend (DocDefinitionList ds1) (DocDefinitionList ds2) = DocDefinitionList (ds1 <> ds2)
docAppend (DocDefinitionList ds1) (DocAppend (DocDefinitionList ds2) d) = DocAppend (DocDefinitionList (ds1 <> ds2)) d
docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 <> ds2)
docAppend (DocOrderedList ds1) (DocAppend (DocOrderedList ds2) d) = DocAppend (DocOrderedList (ds1 <> ds2)) d
docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 <> ds2)
docAppend (DocUnorderedList ds1) (DocAppend (DocUnorderedList ds2) d) = DocAppend (DocUnorderedList (ds1 <> ds2)) d
docAppend DocEmpty d = d
docAppend d DocEmpty = d
docAppend (DocString s1) (DocString s2) = DocString (s1 <> s2)
docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 <> s2))
docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 <> s2)) d
docAppend d1 d2 = DocAppend d1 d2
{- | Parse a text paragraph. Actually just a wrapper over 'parseAll' which
drops leading whitespace.
-}
parseText :: Text -> DocMarkup mod Identifier
parseText = parseAll . Text.dropWhile Char.isSpace . Text.filter (/= '\r')
parseAll :: Text -> DocMarkup mod Identifier
parseAll = snd . parse myParser
where
-- docConcat
-- <$> many
-- ( choice'
-- [ monospace
-- , anchor
-- , identifier
-- , moduleName
-- , picture
-- , mathDisplay
-- , mathInline
-- , markdownImage
-- , markdownLink
-- , hyperlink
-- , bold
-- , emphasis
-- , encodedChar
-- , string'
-- , skipSpecialChar
-- ]
-- )
myParser :: Parser (DocMarkup mod Identifier)
myParser =
docConcat
<$> many
( choice'
[ monospace
, anchor
, identifier
, moduleName
, bold
, emphasis
, string'
, skipSpecialChar
]
)
choice' :: [Parser a] -> Parser a
choice' [] = empty
choice' [p] = p
choice' (p : ps) = Parsec.try p <|> choice' ps
disallowNewline :: Parser Text -> Parser Text
disallowNewline = mfilter (Text.all (/= '\n'))

65
src/Parser/Util.hs Normal file
View file

@ -0,0 +1,65 @@
module Parser.Util where
import Control.Monad (mfilter)
import Data.Functor (($>))
import Data.Text (Text)
import Data.Text qualified as Text
import ParserMonad (Parser)
import Text.Parsec (State (..))
import Text.Parsec qualified as Parsec
import Text.Parsec.Pos (updatePosChar)
{- | Consume characters from the input up to and including the given pattern.
Return everything consumed except for the end pattern itself.
-}
takeUntil :: Text -> Parser Text
takeUntil end_ = Text.dropEnd (Text.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome
where
end = Text.unpack end_
p :: (Bool, String) -> Char -> Maybe (Bool, String)
p acc c = case acc of
(True, _) -> Just (False, end)
(_, []) -> Nothing
(_, x : xs) | x == c -> Just (False, xs)
_ -> Just (c == '\\', end)
requireEnd = mfilter (Text.isSuffixOf end_)
gotSome xs
| Text.null xs = fail "didn't get any content"
| otherwise = return xs
-- | Like `takeWhile`, but unconditionally take escaped characters.
takeWhile_ :: (Char -> Bool) -> Parser Text
takeWhile_ p = scan p_ False
where
p_ escaped c
| escaped = Just False
| not $ p c = Nothing
| otherwise = Just (c == '\\')
-- | Like 'takeWhile1', but unconditionally take escaped characters.
takeWhile1_ :: (Char -> Bool) -> Parser Text
takeWhile1_ = mfilter (not . Text.null) . takeWhile_
{- | Scan the input text, accumulating characters as long as the scanning
function returns true.
-}
scan ::
-- | scan function
(state -> Char -> Maybe state) ->
-- | initial state
state ->
Parser Text
scan f initState = do
parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
(remaining, finalPos, ct) <- go input initState pos 0
let newState = parserState{stateInput = remaining, statePos = finalPos}
Parsec.setParserState newState $> Text.take ct input
where
go !input' !st !posAccum !count' = case Text.uncons input' of
Nothing -> pure (input', posAccum, count')
Just (char', input'') -> case f st char' of
Nothing -> pure (input', posAccum, count')
Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)

26
src/ParserMonad.hs Normal file
View file

@ -0,0 +1,26 @@
module ParserMonad where
import Data.String
import Data.Text (Text)
import Data.Text qualified as Text
import Text.Parsec (Parsec)
import Text.Parsec qualified as Parsec
import Types
type Parser = Parsec Text ParserState
instance (a ~ Text) => IsString (Parser a) where
fromString = fmap Text.pack . Parsec.string
{- | The only bit of information we really care about trudging along with us
through parsing is the version attached to a @\@since@ annotation - if
the doc even contained one.
-}
newtype ParserState = ParserState
{ since :: Maybe Since
}
deriving (Eq, Show)
initialParserState :: ParserState
initialParserState = ParserState Nothing

196
src/Types.hs Normal file
View file

@ -0,0 +1,196 @@
module Types (
DocMarkup (..),
Document (..),
Meta (..),
ModuleLink (..),
Package,
Since (..),
Version,
)
where
newtype Document = Document
{ meta :: Meta
}
deriving (Eq, Show)
newtype Meta = Meta
{ since :: Maybe Since
}
deriving (Eq, Show)
data Since = Since
{ package :: Maybe Package
-- ^ optional package qualification
, version :: Version
}
deriving (Eq, Show)
-- Could have a better type?
type Version = [Int]
type Package = String
data DocMarkup mod id
= DocEmpty
| -- | This is not represented in the markup language, this is for internal use
DocAppend (DocMarkup mod id) (DocMarkup mod id)
| -- | Any text that doesn't match any rules is a bare string
DocString String
| -- | Paragraphs are demarcated by blank lines
DocParagraph (DocMarkup mod id)
| -- | A haskell identifier
DocIdentifier id
| -- | A qualified identifier that couldn't be resolved.
DocIdentifierUnchecked
| -- | A link to a module, might include a label
DocModule (ModuleLink (DocMarkup mod id))
| -- | Emphasis /italics/
DocEmphasis (DocMarkup mod id)
| -- | Monospaced @source code@
DocMonospace (DocMarkup mod id)
| -- | Bold __bold text__
DocBold (DocMarkup mod id)
| {- | Unordered lists
* this
or
- this
-}
DocUnorderedList [DocMarkup mod id]
| {- | Ordered lists
1. this
or
(1) this
-}
DocOrderedList [(Int, DocMarkup mod id)]
| {- | Definition lists
[term] a term
[another term] another definition
-}
DocDefinitionList [(DocMarkup mod id, DocMarkup mod id)]
| {- | Code blocks
@
a code block in here
with multiple lines
@
Or with bird tracks:
> some code
> goes here
-}
DocCodeBlock (DocMarkup mod id)
| {- | Hyperlinks
__marked__:
<http://example.com>
<http://example.com label text>
__Auto-detected URLs__:
http://example.com
https://example.com
ftp://example.com
__Markdown style__
[link text](http://example.com)
[link text]("Module.Name")
-}
DocHyperlink (Hyperlink (DocMarkup mod id))
| {- | Pictures
<<image.png>>
<<image.png title text>>
__Markdown Images__
![alt text](image.png)
-}
DocPicture Picture
| {- | Inline math expressions
\(mathematical expression\)
-}
DocMathInline String
| {- | Math multiline display
\[
mathematical expression
in multiple lines
\]
-}
DocMathDisplay String
| {- | Anchors, no spaces allowed
#anchor-name#
-}
DocAnchor String
| {- | Property descriptions
prop> property description
-}
DocProperty String
| {- | Examples
>>> expression
result line 1
result line 2
-}
DocExamples [Example]
| -- | Header
DocHeader (Header (DocMarkup mod id))
| -- Table
DocTable (Table (DocMarkup mod id))
deriving (Eq, Show)
instance Semigroup (DocMarkup mod id) where
(<>) = DocAppend
instance Monoid (DocMarkup mod id) where
mempty = DocEmpty
mconcat = foldr (<>) mempty
data ModuleLink id = ModuleLink
{ name :: String
, label :: Maybe id
}
deriving (Eq, Show)
data Picture = Picture
{ uri :: String
, title :: Maybe String
}
deriving (Eq, Show)
data Hyperlink id = Hyperlink
{ url :: String
, label :: Maybe id
}
deriving (Eq, Show, Functor, Foldable, Traversable)
data TableCell id = TableCell
{ col :: Int
, row :: Int
, content :: id
}
deriving (Eq, Show, Functor, Foldable, Traversable)
newtype TableRow id = TableRow
{ rows :: [TableCell id]
}
deriving (Eq, Show, Functor, Foldable, Traversable)
data Table id = Table
{ headerRows :: [TableRow id]
, bodyRows :: [TableRow id]
}
deriving (Eq, Show, Functor, Foldable, Traversable)
data Example = Example
{ exampleExpression :: String
, exampleResult :: [String]
}
deriving (Eq, Show)
data Header id = Header
{ level :: HeaderLevel
, title :: id
}
deriving (Eq, Show)
data HeaderLevel
= H1
| H2
| H3
| H4
| H5
| H6
deriving (Eq, Show, Bounded, Enum)

34
test/Spec.hs Normal file
View file

@ -0,0 +1,34 @@
{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_GHC -Wno-orphans #-}
import Test.Hspec
import Data.String (IsString (..))
import Data.Text (Text)
import Identifier (Identifier)
import Lexer
import Parser
import Types
main :: IO ()
main = hspec $ do
describe "Lexer" do
it "lexes" do
lexer "This is string" `shouldBe` undefined
describe "Parser" do
it "Bold" do
"__bold__" `shouldParseTo` (DocBold (DocString "bold"))
it "Emphasis" do
"/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis"))
shouldParseTo :: Text -> DocMarkup mod Identifier -> Expectation
shouldParseTo input ast = parseText input `shouldBe` ast
type Doc id = DocMarkup () id
instance IsString (Doc String) where
fromString = DocString
file :: IO String
file = readFile "test/markup.md"

89
test/markup.md Normal file
View file

@ -0,0 +1,89 @@
/emphasized text/
__bold text__
@monospace text@
"Module.Name"
"Module.Name#anchor"
"Module.Name#anchor"
[http://example.com](http://example.com)
<[http://example.com](http://example.com) label text>
[http://example.com](http://example.com)
[https://example.com](https://example.com)
ftp\://example.com
[link text](http://example.com)
[link text]("Module.Name")
#anchor-name#
'identifier'
<<image.png>>
<<image.png title text>>
![alt text](image.png)
\(mathematical expression\)
\[mathematical expression\]
@
code block content
with multiple lines
@
> code line 1
> code line 2
>>> expression
result line 1
result line 2
>>> another expression
result
prop> property description
* item 1
* item 2
continued content
- item 1
- item 2
1. item 1
2. item 2
(1) item 1
(2) item 2
[term] definition content
[another term] more definition content
+----------+----------+
| Header 1 | Header 2 |
+==========+==========+
| Cell 1 | Cell 2 |
+----------+----------+
| Cell 3 | Cell 4 |
+----------+----------+
= Level 1 Header
== Level 2 Header
=== Level 3 Header
==== Level 4 Header
===== Level 5 Header
====== Level 6 Header
@since package-name-1.2.3
@since 1.2.3
&#65; (decimal)
&#x41; (hexadecimal)
&#X41; (hexadecimal)
This \@escapes\@ at signs