refactor lexer #2

Merged
primrose merged 13 commits from primrose/haddock2:leana8959 into dev 2025-09-26 14:44:47 +00:00
2 changed files with 10 additions and 11 deletions
Showing only changes of commit 326c7b681c - Show all commits

View file

@ -12,7 +12,7 @@ bold ::= '__' text_no_newline '__'
monospace ::= '@' text_content '@' monospace ::= '@' text_content '@'
link ::= module_link | hyperlink | markdown_link link ::= module_link | hyperlink | markdown_link
module_link ::= '"' module_name ( '#' anchor_name )? '"' module_link ::= '"' module_name ( ('#' | '\#') anchor_name )? '"'
hyperlink ::= '<' url ( ' ' link_text )? '>' hyperlink ::= '<' url ( ' ' link_text )? '>'
markdown_link ::= '[' link_text '](' ( url | module_link ) ')' markdown_link ::= '[' link_text '](' ( url | module_link ) ')'

View file

@ -94,7 +94,7 @@ lexText = go
, link , link
, labeledLink , labeledLink
, modules , modules
, anchors , anchor
, textElement , textElement
, quotes , quotes
, birdTrack , birdTrack
@ -152,9 +152,6 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2
eol :: Parser () eol :: Parser ()
eol = void "\n" <|> void "\r\n" <|> Parsec.eof eol = void "\n" <|> void "\r\n" <|> Parsec.eof
anchorHash :: Parser Text
anchorHash = "#" <|> try "\\#"
header1 :: Lexer header1 :: Lexer
header1 = delimitedNoTrailing "= " eol (Header One) header1 = delimitedNoTrailing "= " eol (Header One)
@ -174,11 +171,10 @@ header6 :: Lexer
header6 = delimitedNoTrailing "====== " eol (Header Six) header6 = delimitedNoTrailing "====== " eol (Header Six)
-- #anchors# -- #anchors#
anchors :: Lexer anchor :: Lexer
anchors = anchor = do
tokenise x <- located $ between "#" "#" (Anchor <$> anyUntil "#")
[ between anchorHash anchorHash (Anchor <$> anyUntil anchorHash) pure [x]
]
moduleNames :: Parser Text moduleNames :: Parser Text
@ -192,7 +188,7 @@ identifierChar = satisfy (\c -> isAlphaNum c || c == '_')
-- "Module.Name" -- "Module.Name"
-- "Module.Name#anchor" -- "Module.Name#anchor"
-- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben -- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben
modules :: Lexer modules :: Lexer
modules = between (char '"') (char '"') inner modules = between (char '"') (char '"') inner
where where
@ -203,6 +199,9 @@ modules = between (char '"') (char '"') inner
Just anchor -> [module_, anchor] Just anchor -> [module_, anchor]
Nothing -> [module_] Nothing -> [module_]
anchorHash :: Parser Text
anchorHash = "#" <|> try "\\#"
anchorText :: Parser Text anchorText :: Parser Text
anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))