From 326c7b681cd602dbe725a111efc0254f3700b44a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 22:19:37 +0800 Subject: [PATCH 1/4] fix(lexer): old anchor is only used in moduleName --- Grammar.ebnf | 2 +- src/Lexer.hs | 19 +++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/Grammar.ebnf b/Grammar.ebnf index bff331d..2404a67 100644 --- a/Grammar.ebnf +++ b/Grammar.ebnf @@ -12,7 +12,7 @@ bold ::= '__' text_no_newline '__' monospace ::= '@' text_content '@' link ::= module_link | hyperlink | markdown_link -module_link ::= '"' module_name ( '#' anchor_name )? '"' +module_link ::= '"' module_name ( ('#' | '\#') anchor_name )? '"' hyperlink ::= '<' url ( ' ' link_text )? '>' markdown_link ::= '[' link_text '](' ( url | module_link ) ')' diff --git a/src/Lexer.hs b/src/Lexer.hs index 04fa84f..9b33f03 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -94,7 +94,7 @@ lexText = go , link , labeledLink , modules - , anchors + , anchor , textElement , quotes , birdTrack @@ -152,9 +152,6 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2 eol :: Parser () eol = void "\n" <|> void "\r\n" <|> Parsec.eof -anchorHash :: Parser Text -anchorHash = "#" <|> try "\\#" - header1 :: Lexer header1 = delimitedNoTrailing "= " eol (Header One) @@ -174,11 +171,10 @@ header6 :: Lexer header6 = delimitedNoTrailing "====== " eol (Header Six) -- #anchors# -anchors :: Lexer -anchors = - tokenise - [ between anchorHash anchorHash (Anchor <$> anyUntil anchorHash) - ] +anchor :: Lexer +anchor = do + x <- located $ between "#" "#" (Anchor <$> anyUntil "#") + pure [x] moduleNames :: Parser Text @@ -192,7 +188,7 @@ identifierChar = satisfy (\c -> isAlphaNum c || c == '_') -- "Module.Name" -- "Module.Name#anchor" --- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben +-- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben modules :: Lexer modules = between (char '"') (char '"') inner where @@ -203,6 +199,9 @@ modules = between (char '"') (char '"') inner Just anchor -> [module_, anchor] Nothing -> [module_] + anchorHash :: Parser Text + anchorHash = "#" <|> try "\\#" + anchorText :: Parser Text anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) From 29c015b79310aa4ecd6a51f9b895ad5910bb2766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 22:22:00 +0800 Subject: [PATCH 2/4] style(lexer): make binding naming consistent --- src/Lexer.hs | 12 ++++++------ test/Spec.hs | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 9b33f03..c44c0e4 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -93,7 +93,7 @@ lexText = go , spaceToken , link , labeledLink - , modules + , module_ , anchor , textElement , quotes @@ -189,15 +189,15 @@ identifierChar = satisfy (\c -> isAlphaNum c || c == '_') -- "Module.Name" -- "Module.Name#anchor" -- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben -modules :: Lexer -modules = between (char '"') (char '"') inner +module_ :: Lexer +module_ = between (char '"') (char '"') inner where inner = do - module_ <- located $ Module <$> moduleNames + m <- located $ Module <$> moduleNames mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText)) pure $ case mAnchor of - Just anchor -> [module_, anchor] - Nothing -> [module_] + Just anc -> [m, anc] + Nothing -> [m] anchorHash :: Parser Text anchorHash = "#" <|> try "\\#" diff --git a/test/Spec.hs b/test/Spec.hs index 7ddbcff..745aefa 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -19,8 +19,8 @@ main = hspec $ do describe "minimal" do it "handles unicode" unicode it "escapes" escaping - it "maths" maths - it "anchors" anchors + it "maths" math + it "anchors" anchor it "space chars" space it "bare string" someString it "emphasis" emphatic @@ -84,14 +84,14 @@ labeledLink = , (1, 35, LabeledLinkClose) ] -anchors :: Expectation -anchors = +anchor :: Expectation +anchor = "#myAnchor#" `shouldLexTo` [ (1, 1, Anchor "myAnchor") ] -maths :: IO () -maths = do +math :: IO () +math = do "\\[some math\\]" `shouldLexTo` [ (1, 1, MathMultilineOpen) , (1, 3, Token "some math") From 2597e693f13e6e0b0e3622810cc3194c442ffdba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 22:31:59 +0800 Subject: [PATCH 3/4] ref(lexer): simplify labeledLink --- src/Lexer.hs | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index c44c0e4..7874863 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -226,20 +226,12 @@ link = do labeledLink :: Lexer labeledLink = do - pos <- getPosition - void $ string "<" - link' <- linkRaw - pos7 <- getPosition - label' <- anyUntil $ string ">" - pos8 <- getPosition - void $ ">" - + open <- located $ LabeledLinkOpen <$ "<" + linkRes <- linkRaw + labelRes <- located $ Token <$> anyUntil ">" + close <- located $ LabeledLinkClose <$ ">" pure $ - (pos, LabeledLinkOpen) - : link' - <> [ (pos7, Token label') - , (pos8, LabeledLinkClose) - ] + open : linkRes <> [ labelRes , close ] mathMultiline :: Lexer mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose From 970b658926e4a683f6c557539ddf750803cabd44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 22:32:35 +0800 Subject: [PATCH 4/4] chore(lexer): clean up --- src/Lexer.hs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 7874863..426a7ff 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -10,7 +10,7 @@ import Control.Monad (mfilter, void) import Data.Functor (($>)) import Data.Text (Text, intercalate) import Data.Text qualified as Text -import GHC.Unicode (isAlpha, isAlphaNum, isControl, isPrint, isSpace, isUpper) +import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper) import ParserMonad (Parser, initialParserState) import Text.Parsec import Text.Parsec qualified as Parsec @@ -66,9 +66,6 @@ data Token located :: Parser a -> Parser (SourcePos, a) located p = (,) <$> getPosition <*> p -startPosition :: Parser a -> Parser SourcePos -startPosition = fmap fst . located - tokenise :: [Parser a] -> Parser [(SourcePos, a)] tokenise = sequence . map located