Compare commits

...

3 commits

Author SHA1 Message Date
4e49406286 Added cabal-gild to format cmd 2025-09-26 22:33:06 +02:00
1b59bb9c25 Formatting 2025-09-26 22:32:19 +02:00
72909cb84b Added Makefile 2025-09-26 22:31:28 +02:00
4 changed files with 285 additions and 274 deletions

9
Makefile Normal file
View file

@ -0,0 +1,9 @@
.PHONY: help
help: ## Show this help.
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
.PHONY: format
format:
find . -type f -name "*.hs" ! -path "./dist-newstyle/*" -exec fourmolu -i {} +
cabal-gild --io=haddock2.cabal

View file

@ -46,10 +46,10 @@ test-suite haddock2-test
type: exitcode-stdio-1.0
main-is: Spec.hs
build-depends:
parsec ^>=3.1.18.0,
base >=4.20.1.0,
haddock2:{haddock2-lib},
hspec ^>=2.11.0,
parsec ^>=3.1.18.0,
text ^>=2.1.2,
hs-source-dirs: test

View file

@ -1,10 +1,11 @@
{-# LANGUAGE OverloadedStrings #-}
module Lexer (
Token (..),
lexer,
emphasis,
) where
Token (..),
lexer,
emphasis,
)
where
import Control.Monad (mfilter, void)
import Data.Functor (($>))
@ -17,51 +18,52 @@ import Text.Parsec qualified as Parsec
import Text.Parsec.Pos (updatePosChar)
type Located a = (SourcePos, a)
type LocatedToken = (SourcePos, Token)
type Lexer = Parser [LocatedToken]
data Level
= One
| Two
| Three
| Four
| Five
| Six
deriving (Eq, Show)
= One
| Two
| Three
| Four
| Five
| Six
deriving (Eq, Show)
data Token
= Token Text
| Anchor Text
| BirdTrack
| BoldOpen
| BoldClose
| Escape
| EmphasisOpen
| EmphasisClose
| Header Level
| MonospaceOpen
| MonospaceClose
| Newline
| LinkOpen
| LinkClose
| LabeledLinkOpen
| LabeledLinkClose
| ParenOpen
| ParenClose
| BracketOpen
| BracketClose
| MathInlineOpen
| MathInlineClose
| MathMultilineOpen
| MathMultilineClose
| NumericEntity Int
| Module Text
| QuoteOpen
| QuoteClose
| Space
| EOF
deriving (Eq, Show)
= Token Text
| Anchor Text
| BirdTrack
| BoldOpen
| BoldClose
| Escape
| EmphasisOpen
| EmphasisClose
| Header Level
| MonospaceOpen
| MonospaceClose
| Newline
| LinkOpen
| LinkClose
| LabeledLinkOpen
| LabeledLinkClose
| ParenOpen
| ParenClose
| BracketOpen
| BracketClose
| MathInlineOpen
| MathInlineClose
| MathMultilineOpen
| MathMultilineClose
| NumericEntity Int
| Module Text
| QuoteOpen
| QuoteClose
| Space
| EOF
deriving (Eq, Show)
located :: Parser a -> Parser (SourcePos, a)
located p = (,) <$> getPosition <*> p
@ -74,74 +76,74 @@ lexer = Parsec.runParser lexText initialParserState "input" . Text.pack
lexText :: Parser [LocatedToken]
lexText = go
where
go = do
Parsec.optionMaybe Parsec.eof >>= \case
Just _ -> pure []
Nothing -> do
toks <-
choice $
Parsec.try
<$> [ mathMultiline
, mathInline
, escape -- maths go before escape to avoid mismatch
, headers
, newlineToken
, spaceToken
, link
, labeledLink
, module_
, anchor
, textElement
, quotes
, birdTrack
, other
]
rest <- go
pure (toks <> rest)
where
go = do
Parsec.optionMaybe Parsec.eof >>= \case
Just _ -> pure []
Nothing -> do
toks <-
choice $
Parsec.try
<$> [ mathMultiline
, mathInline
, escape -- maths go before escape to avoid mismatch
, headers
, newlineToken
, spaceToken
, link
, labeledLink
, module_
, anchor
, textElement
, quotes
, birdTrack
, other
]
rest <- go
pure (toks <> rest)
-- Tokens
textElement :: Parser [LocatedToken]
textElement =
choice $
Parsec.try
<$> [ emphasis
, bold
, monospace
]
choice $
Parsec.try
<$> [ emphasis
, bold
, monospace
]
headers :: Parser [LocatedToken]
headers =
choice $
Parsec.try
<$> [ header1
, header2
, header3
, header4
, header5
, header6
]
choice $
Parsec.try
<$> [ header1
, header2
, header3
, header4
, header5
, header6
]
anyUntil :: Parser a -> Parser Text
anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p)
delimitedAsTuple :: Parser open -> Parser close -> Parser (Located open, LocatedToken, Located close)
delimitedAsTuple openP closeP =
(,,)
<$> located openP
<*> located (Token <$> anyUntil closeP)
<*> located closeP
(,,)
<$> located openP
<*> located (Token <$> anyUntil closeP)
<*> located closeP
delimited :: Parser open -> Parser close -> Token -> Token -> Parser [LocatedToken]
delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP)
where
asList (a, tok, b) = [a, tok, b]
where
asList (a, tok, b) = [a, tok, b]
delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken]
delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
where
asList (a, tok, _) = [a, tok]
where
asList (a, tok, _) = [a, tok]
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
delimitedSymmetric s t1 t2 = delimited s s t1 t2
@ -170,9 +172,8 @@ header6 = delimitedNoTrailing "====== " eol (Header Six)
-- #anchors#
anchor :: Lexer
anchor = do
x <- located $ between "#" "#" (Anchor <$> anyUntil "#")
pure [x]
x <- located $ between "#" "#" (Anchor <$> anyUntil "#")
pure [x]
moduleNames :: Parser Text
moduleNames = intercalate "." . fmap Text.pack <$> upperId `sepBy1` char '.'
@ -188,47 +189,47 @@ identifierChar = satisfy (\c -> isAlphaNum c || c == '_')
-- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben
module_ :: Lexer
module_ = between (char '"') (char '"') inner
where
inner = do
m <- located $ Module <$> moduleNames
mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText))
pure $ case mAnchor of
Just anc -> [m, anc]
Nothing -> [m]
where
inner = do
m <- located $ Module <$> moduleNames
mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText))
pure $ case mAnchor of
Just anc -> [m, anc]
Nothing -> [m]
anchorHash :: Parser Text
anchorHash = "#" <|> try "\\#"
anchorHash :: Parser Text
anchorHash = "#" <|> try "\\#"
anchorText :: Parser Text
anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
anchorText :: Parser Text
anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
linkRaw :: Lexer
linkRaw =
tokenise
[ BracketOpen <$ "["
, Token <$> anyUntil "]"
, BracketClose <$ "]"
, ParenOpen <$ "("
, Token <$> anyUntil ")"
, ParenClose <$ ")"
]
tokenise
[ BracketOpen <$ "["
, Token <$> anyUntil "]"
, BracketClose <$ "]"
, ParenOpen <$ "("
, Token <$> anyUntil ")"
, ParenClose <$ ")"
]
link :: Lexer
link = do
pos <- getPosition
l <- linkRaw
-- register the position of the last token
pos' <- flip incSourceColumn (-1) <$> getPosition
pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)]
pos <- getPosition
l <- linkRaw
-- register the position of the last token
pos' <- flip incSourceColumn (-1) <$> getPosition
pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)]
labeledLink :: Lexer
labeledLink = do
open <- located $ LabeledLinkOpen <$ "<"
linkRes <- linkRaw
labelRes <- located $ Token <$> anyUntil ">"
close <- located $ LabeledLinkClose <$ ">"
pure $
open : linkRes <> [ labelRes , close ]
open <- located $ LabeledLinkOpen <$ "<"
linkRes <- linkRaw
labelRes <- located $ Token <$> anyUntil ">"
close <- located $ LabeledLinkClose <$ ">"
pure $
open : linkRes <> [labelRes, close]
mathMultiline :: Lexer
mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose
@ -256,23 +257,23 @@ monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
other :: Lexer
other = do
pos <- getPosition
c <- takeWhile1_ isUnicodeAlphaNum
pure . pure $ (pos, Token c)
where
isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
pos <- getPosition
c <- takeWhile1_ isUnicodeAlphaNum
pure . pure $ (pos, Token c)
where
isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
spaceToken :: Lexer
spaceToken = do
pos <- getPosition
_ <- many1 (char ' ')
pure . pure $ (pos, Space)
pos <- getPosition
_ <- many1 (char ' ')
pure . pure $ (pos, Space)
newlineToken :: Lexer
newlineToken = do
pos <- getPosition
_ <- newline
pure . pure $ (pos, Newline)
pos <- getPosition
_ <- newline
pure . pure $ (pos, Newline)
-------
-- Helpers
@ -281,11 +282,11 @@ newlineToken = do
-- | Like `takeWhile`, but unconditionally take escaped characters.
takeWhile_ :: (Char -> Bool) -> Parser Text
takeWhile_ p = scan p_ False
where
p_ escaped c
| escaped = Just False
| not $ p c = Nothing
| otherwise = Just (c == '\\')
where
p_ escaped c
| escaped = Just False
| not $ p c = Nothing
| otherwise = Just (c == '\\')
-- | Like 'takeWhile1', but unconditionally take escaped characters.
takeWhile1_ :: (Char -> Bool) -> Parser Text
@ -295,19 +296,20 @@ takeWhile1_ = mfilter (not . Text.null) . takeWhile_
function returns true.
-}
scan ::
-- | scan function
(state -> Char -> Maybe state) ->
-- | initial state
state ->
Parser Text
-- | scan function
(state -> Char -> Maybe state) ->
-- | initial state
state ->
Parser Text
scan f initState = do
parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
(remaining, finalPos, ct) <- go input initState pos 0
let newState = parserState{stateInput = remaining, statePos = finalPos}
Parsec.setParserState newState $> Text.take ct input
where
go !input' !st !posAccum !count' = case Text.uncons input' of
Nothing -> pure (input', posAccum, count')
Just (char', input'') -> case f st char' of
Nothing -> pure (input', posAccum, count')
Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
(remaining, finalPos, ct) <- go input initState pos 0
let newState = parserState{stateInput = remaining, statePos = finalPos}
Parsec.setParserState newState $> Text.take ct input
where
go !input' !st !posAccum !count' = case Text.uncons input' of
Nothing -> pure (input', posAccum, count')
Just (char', input'') -> case f st char' of
Nothing -> pure (input', posAccum, count')
Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)

View file

@ -10,33 +10,33 @@ import Types
import Data.String (IsString (..))
import Data.Text (Text)
import Text.Parsec.Pos
import GHC.Stack
import Text.Parsec.Pos
main :: IO ()
main = hspec $ do
describe "Lexer" do
describe "minimal" do
it "handles unicode" unicode
it "escapes" escaping
it "maths" math
it "anchors" anchor
it "space chars" space
it "bare string" someString
it "emphasis" emphatic
it "monospace" monospace
it "labeled link" labeledLink
it "markdown link" link
it "bird tracks" birdTracks
it "module names" modules
it "quotes" quotes
it "ignores nesting" ignoreNesting
describe "Lexer" do
describe "minimal" do
it "handles unicode" unicode
it "escapes" escaping
it "maths" math
it "anchors" anchor
it "space chars" space
it "bare string" someString
it "emphasis" emphatic
it "monospace" monospace
it "labeled link" labeledLink
it "markdown link" link
it "bird tracks" birdTracks
it "module names" modules
it "quotes" quotes
it "ignores nesting" ignoreNesting
describe "Parser" do
it "Bold" do
"__bold__" `shouldParseTo` (DocBold (DocString "bold"))
it "Emphasis" do
"/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis"))
describe "Parser" do
it "Bold" do
"__bold__" `shouldParseTo` (DocBold (DocString "bold"))
it "Emphasis" do
"/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis"))
------------
-- Tests
@ -44,137 +44,137 @@ main = hspec $ do
modules :: Expectation
modules = do
"\"MyModule.Name\""
`shouldLexTo` [ (1, 2, Module "MyModule.Name")
]
"\"MyModule.Name\""
`shouldLexTo` [ (1, 2, Module "MyModule.Name")
]
"\"OtherModule.Name#myAnchor\""
`shouldLexTo` [ (1, 2, Module "OtherModule.Name")
, (1, 18, Anchor "myAnchor")
]
"\"OtherModule.Name#myAnchor\""
`shouldLexTo` [ (1, 2, Module "OtherModule.Name")
, (1, 18, Anchor "myAnchor")
]
"\"OtherModule.Name\\#myAnchor\""
`shouldLexTo` [ (1, 2, Module "OtherModule.Name")
, (1, 18, Anchor "myAnchor")
]
"\"OtherModule.Name\\#myAnchor\""
`shouldLexTo` [ (1, 2, Module "OtherModule.Name")
, (1, 18, Anchor "myAnchor")
]
link :: Expectation
link =
"[link to](http://some.website)"
`shouldLexTo` [ (1, 1, LinkOpen)
, (1, 1, BracketOpen)
, (1, 2, Token "link to")
, (1, 9, BracketClose)
, (1, 10, ParenOpen)
, (1, 11, Token "http://some.website")
, (1, 30, ParenClose)
, (1, 30, LinkClose)
]
"[link to](http://some.website)"
`shouldLexTo` [ (1, 1, LinkOpen)
, (1, 1, BracketOpen)
, (1, 2, Token "link to")
, (1, 9, BracketClose)
, (1, 10, ParenOpen)
, (1, 11, Token "http://some.website")
, (1, 30, ParenClose)
, (1, 30, LinkClose)
]
labeledLink :: Expectation
labeledLink =
"<[link here](http://to.here) label>"
`shouldLexTo` [ (1, 1, LabeledLinkOpen)
, (1, 2, BracketOpen)
, (1, 3, Token "link here")
, (1, 12, BracketClose)
, (1, 13, ParenOpen)
, (1, 14, Token "http://to.here")
, (1, 28, ParenClose)
, (1, 29, Token " label")
, (1, 35, LabeledLinkClose)
]
"<[link here](http://to.here) label>"
`shouldLexTo` [ (1, 1, LabeledLinkOpen)
, (1, 2, BracketOpen)
, (1, 3, Token "link here")
, (1, 12, BracketClose)
, (1, 13, ParenOpen)
, (1, 14, Token "http://to.here")
, (1, 28, ParenClose)
, (1, 29, Token " label")
, (1, 35, LabeledLinkClose)
]
anchor :: Expectation
anchor =
"#myAnchor#"
`shouldLexTo` [ (1, 1, Anchor "myAnchor")
]
"#myAnchor#"
`shouldLexTo` [ (1, 1, Anchor "myAnchor")
]
math :: IO ()
math = do
"\\[some math\\]"
`shouldLexTo` [ (1, 1, MathMultilineOpen)
, (1, 3, Token "some math")
, (1, 12, MathMultilineClose)
]
"\\(other maths\\)"
`shouldLexTo` [ (1, 1, MathInlineOpen)
, (1, 3, Token "other maths")
, (1, 14, MathInlineClose)
]
"\\[some math\\]"
`shouldLexTo` [ (1, 1, MathMultilineOpen)
, (1, 3, Token "some math")
, (1, 12, MathMultilineClose)
]
"\\(other maths\\)"
`shouldLexTo` [ (1, 1, MathInlineOpen)
, (1, 3, Token "other maths")
, (1, 14, MathInlineClose)
]
escaping :: Expectation
escaping = do
"\\("
`shouldLexTo` [ (1, 1, Escape)
, (1, 2, Token "(")
]
"\\(\r\n"
`shouldLexTo` [ (1, 1, Escape)
, (1, 2, Token "(")
]
"\\("
`shouldLexTo` [ (1, 1, Escape)
, (1, 2, Token "(")
]
"\\(\r\n"
`shouldLexTo` [ (1, 1, Escape)
, (1, 2, Token "(")
]
unicode :: Expectation
unicode =
"ドラゴンクエストの冒険者🐉"
`shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者🐉")
]
"ドラゴンクエストの冒険者🐉"
`shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者🐉")
]
ignoreNesting :: Expectation
ignoreNesting =
">/foo/"
`shouldLexTo` [ (1, 1, Token ">/foo/")
]
">/foo/"
`shouldLexTo` [ (1, 1, Token ">/foo/")
]
birdTracks :: Expectation
birdTracks =
">> code"
`shouldLexTo` [ (1, 1, BirdTrack)
, (1, 4, Token "code")
]
">> code"
`shouldLexTo` [ (1, 1, BirdTrack)
, (1, 4, Token "code")
]
quotes :: Expectation
quotes =
"\"quoted\""
`shouldLexTo` [ (1, 1, QuoteOpen)
, (1, 2, Token "quoted")
, (1, 8, QuoteClose)
]
"\"quoted\""
`shouldLexTo` [ (1, 1, QuoteOpen)
, (1, 2, Token "quoted")
, (1, 8, QuoteClose)
]
space :: Expectation
space = do
"\n "
`shouldLexTo` [ (1, 1, Newline)
, (2, 1, Space)
]
" \n"
`shouldLexTo` [ (1, 1, Space)
, (1, 2, Newline)
]
"\n "
`shouldLexTo` [ (1, 1, Newline)
, (2, 1, Space)
]
" \n"
`shouldLexTo` [ (1, 1, Space)
, (1, 2, Newline)
]
monospace :: Expectation
monospace =
"@mono@"
`shouldLexTo` [ (1, 1, MonospaceOpen)
, (1, 2, Token "mono")
, (1, 6, MonospaceClose)
]
"@mono@"
`shouldLexTo` [ (1, 1, MonospaceOpen)
, (1, 2, Token "mono")
, (1, 6, MonospaceClose)
]
emphatic :: Expectation
emphatic =
"/emphatic/"
`shouldLexTo` [ (1, 1, EmphasisOpen)
, (1, 2, Token "emphatic")
, (1, 10, EmphasisClose)
]
"/emphatic/"
`shouldLexTo` [ (1, 1, EmphasisOpen)
, (1, 2, Token "emphatic")
, (1, 10, EmphasisClose)
]
someString :: Expectation
someString =
"some string"
`shouldLexTo` [ (1, 1, Token "some")
, (1, 5, Space)
, (1, 6, Token "string")
]
"some string"
`shouldLexTo` [ (1, 1, Token "some")
, (1, 5, Space)
, (1, 6, Token "string")
]
--------------
-- Helpers
@ -183,16 +183,16 @@ someString =
type Doc id = DocMarkup () id
instance IsString (Doc String) where
fromString = DocString
fromString = DocString
shouldLexTo :: String -> [(Int, Int, Token)] -> Expectation
shouldLexTo input expected =
withFrozenCallStack $
withFrozenCallStack $
case lexer input of
Right tokens -> do
let actual = map (\(pos, tok) -> (sourceLine pos, sourceColumn pos, tok)) tokens
actual `shouldBe` expected
Left err -> expectationFailure $ "Parse error: " <> show err
Right tokens -> do
let actual = map (\(pos, tok) -> (sourceLine pos, sourceColumn pos, tok)) tokens
actual `shouldBe` expected
Left err -> expectationFailure $ "Parse error: " <> show err
shouldParseTo :: Text -> DocMarkup mod Identifier -> Expectation
shouldParseTo input ast = parseText input `shouldBe` ast