From fdb9070e99045c0e44b07df2d1a591fa5396d13c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 18:35:15 +0800 Subject: [PATCH 01/13] fix(lexer): handle crlf in newline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do we support windows 🤔 --- src/Lexer.hs | 2 +- test/Spec.hs | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index e12324b..87f0ee0 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -152,7 +152,7 @@ delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric s t1 t2 = delimited s s t1 t2 eol :: Parser () -eol = void "\n" <|> Parsec.eof +eol = void "\n" <|> void "\r\n" <|> Parsec.eof header1 :: Lexer header1 = delimitedMaybe (void $ "= ") eol (Header One) Nothing diff --git a/test/Spec.hs b/test/Spec.hs index 0a7653d..fb9d8b2 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -107,11 +107,15 @@ maths = do ] escaping :: Expectation -escaping = +escaping = do "\\(" `shouldLexTo` [ (1, 1, Escape) , (1, 2, Token "(") ] + "\\(\r\n" + `shouldLexTo` [ (1, 1, Escape) + , (1, 2, Token "(") + ] unicode :: Expectation unicode = -- 2.49.1 From d6087ec3d606f3d1937149ff1cce769ea4d775c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 20:15:41 +0800 Subject: [PATCH 02/13] ref(lexer): simplify delimited logic --- src/Lexer.hs | 56 +++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 87f0ee0..31f3ac0 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -16,6 +16,7 @@ import Text.Parsec import Text.Parsec qualified as Parsec import Text.Parsec.Pos (updatePosChar) +type Located a = (SourcePos, a) type LocatedToken = (SourcePos, Token) type Lexer = Parser [LocatedToken] @@ -125,28 +126,25 @@ headers = , header6 ] -delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken] -delimitedMaybe openMark closeMark openToken closeToken = do - openPos <- getPosition - void openMark - tokenPos <- getPosition - content <- anyUntil closeMark - closePos <- getPosition - void closeMark - - let openTok :: LocatedToken = (openPos, openToken) - res :: LocatedToken = (tokenPos, Token content) - closeToks :: [LocatedToken] = case closeToken of - Just close -> [(closePos, close)] - Nothing -> [] - - pure $ [openTok, res] <> closeToks - anyUntil :: Parser a -> Parser Text anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p) -delimited :: Parser a -> Parser a -> Token -> Token -> Parser [LocatedToken] -delimited a b c d = delimitedMaybe a b c (Just d) +delimitedAsTuple :: Parser open -> Parser close -> Parser (Located open, LocatedToken, Located close) +delimitedAsTuple openP closeP = + (,,) + <$> located openP + <*> located (Token <$> anyUntil closeP) + <*> located closeP + +delimited :: Parser a -> Parser b -> Token -> Token -> Parser [LocatedToken] +delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP) + where + asList (a, tok, b) = [a, tok, b] + +delimitedNoTrailing :: Parser a -> Parser b -> Token -> Parser [LocatedToken] +delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) + where + asList (a, tok, _) = [a, tok] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric s t1 t2 = delimited s s t1 t2 @@ -155,22 +153,22 @@ eol :: Parser () eol = void "\n" <|> void "\r\n" <|> Parsec.eof header1 :: Lexer -header1 = delimitedMaybe (void $ "= ") eol (Header One) Nothing +header1 = delimitedNoTrailing "= " eol (Header One) header2 :: Lexer -header2 = delimitedMaybe (void $ "== ") eol (Header Two) Nothing +header2 = delimitedNoTrailing "== " eol (Header Two) header3 :: Lexer -header3 = delimitedMaybe (void $ "=== ") eol (Header Three) Nothing +header3 = delimitedNoTrailing "=== " eol (Header Three) header4 :: Lexer -header4 = delimitedMaybe (void $ "==== ") eol (Header Four) Nothing +header4 = delimitedNoTrailing "==== " eol (Header Four) header5 :: Lexer -header5 = delimitedMaybe (void $ "===== ") eol (Header Five) Nothing +header5 = delimitedNoTrailing "===== " eol (Header Five) header6 :: Lexer -header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing +header6 = delimitedNoTrailing "====== " eol (Header Six) -- #anchors# anchors :: Lexer @@ -247,16 +245,16 @@ labeledLink = do ] mathsBracket :: Lexer -mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose +mathsBracket = delimited "\\[" "\\]" MathsBracketOpen MathsBracketClose mathsParens :: Lexer -mathsParens = delimited (void $ "\\(") (void "\\)") MathsParenOpen MathsParenClose +mathsParens = delimited "\\(" "\\)" MathsParenOpen MathsParenClose birdTrack :: Lexer -birdTrack = delimitedMaybe (void ">> ") eol BirdTrack Nothing +birdTrack = delimitedNoTrailing ">> " eol BirdTrack escape :: Lexer -escape = delimitedMaybe (void "\\") eol Escape Nothing +escape = delimitedNoTrailing "\\" eol Escape quotes :: Lexer quotes = delimitedSymmetric "\"" QuoteOpen QuoteClose -- 2.49.1 From 368e5bc9a0eb923ee2c79d59d7b6cafff3093cc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 20:25:48 +0800 Subject: [PATCH 03/13] ref(lexer): simplify anchor --- src/Lexer.hs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 31f3ac0..d89ad53 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -152,6 +152,9 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2 eol :: Parser () eol = void "\n" <|> void "\r\n" <|> Parsec.eof +anchorHash :: Parser Text +anchorHash = "#" <|> try "\\#" + header1 :: Lexer header1 = delimitedNoTrailing "= " eol (Header One) @@ -172,15 +175,10 @@ header6 = delimitedNoTrailing "====== " eol (Header Six) -- #anchors# anchors :: Lexer -anchors = do - pos <- getPosition - void $ try anchor' - txt <- anyUntil anchor' - void $ try anchor' - - pure [(pos, Anchor txt)] - where - anchor' = (string "#" <|> string "\\#") +anchors = + tokenise + [ between anchorHash anchorHash (Anchor <$> anyUntil anchorHash) + ] -- "Module.Name" -- "Module.Name#anchor" -- 2.49.1 From 7ceb9b0277f440565436f84a076cd57582dc59e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 20:47:55 +0800 Subject: [PATCH 04/13] ref(moduleName): break into multiple smaller functions upperId has been changed to only use isUpper because an non alphabetical character would be false anyway --- src/Lexer.hs | 40 ++++++++++++++++++++-------------------- test/Spec.hs | 9 +++------ 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index d89ad53..89bafc5 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -56,7 +56,7 @@ data Token | MathsBracketOpen | MathsBracketClose | NumericEntity Int - | Module + | Module Text | QuoteOpen | QuoteClose | Space @@ -180,31 +180,31 @@ anchors = [ between anchorHash anchorHash (Anchor <$> anyUntil anchorHash) ] + +moduleName :: Parser Text +moduleName = intercalate "." . fmap Text.pack <$> upperId `sepBy1` char '.' + +upperId :: Parser String +upperId = (:) <$> satisfy isUpper <*> many1 identifierChar + +identifierChar :: Parser Char +identifierChar = satisfy (\c -> isAlphaNum c || c == '_') + -- "Module.Name" -- "Module.Name#anchor" -- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben modules :: Lexer -modules = do - startPos <- startPosition $ char '"' - (modPos, modName) <- located modId - anch <- option [] do - anchPos <- startPosition (string "#" <|> string' "\\#") - txt <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) - pure [(anchPos, Anchor txt)] - - void $ char '"' - pure $ [(startPos, Module), (modPos, Token modName)] <> anch +modules = between (char '"') (char '"') inner where - modId = intercalate "." <$> (fmap Text.pack <$> (conId `sepBy1` (char '.'))) + inner = do + module_ <- located $ Module <$> moduleName + mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText)) + pure $ case mAnchor of + Just anchor -> [module_, anchor] + Nothing -> [module_] - conId :: Parser String - conId = - (:) - <$> satisfy (\c -> isAlpha c && isUpper c) - <*> many1 conChar - - conChar :: Parser Char - conChar = satisfy (\c -> isAlphaNum c || c == '_') + anchorText :: Parser Text + anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) linkRaw :: Lexer linkRaw = diff --git a/test/Spec.hs b/test/Spec.hs index fb9d8b2..a09489e 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -45,19 +45,16 @@ main = hspec $ do modules :: Expectation modules = do "\"MyModule.Name\"" - `shouldLexTo` [ (1, 1, Module) - , (1, 2, Token "MyModule.Name") + `shouldLexTo` [ (1, 2, Module "MyModule.Name") ] "\"OtherModule.Name#myAnchor\"" - `shouldLexTo` [ (1, 1, Module) - , (1, 2, Token "OtherModule.Name") + `shouldLexTo` [ (1, 2, Module "OtherModule.Name") , (1, 18, Anchor "myAnchor") ] "\"OtherModule.Name\\#myAnchor\"" - `shouldLexTo` [ (1, 1, Module) - , (1, 2, Token "OtherModule.Name") + `shouldLexTo` [ (1, 2, Module "OtherModule.Name") , (1, 18, Anchor "myAnchor") ] link :: Expectation -- 2.49.1 From 75c48171669cc973c14e5252d1bbb9a1c117ee58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 20:51:05 +0800 Subject: [PATCH 05/13] style(lexer): pluralize moduleNames parser --- src/Lexer.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 89bafc5..3e98cce 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -181,8 +181,8 @@ anchors = ] -moduleName :: Parser Text -moduleName = intercalate "." . fmap Text.pack <$> upperId `sepBy1` char '.' +moduleNames :: Parser Text +moduleNames = intercalate "." . fmap Text.pack <$> upperId `sepBy1` char '.' upperId :: Parser String upperId = (:) <$> satisfy isUpper <*> many1 identifierChar @@ -197,7 +197,7 @@ modules :: Lexer modules = between (char '"') (char '"') inner where inner = do - module_ <- located $ Module <$> moduleName + module_ <- located $ Module <$> moduleNames mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText)) pure $ case mAnchor of Just anchor -> [module_, anchor] -- 2.49.1 From 6c0b4a4288242d72820805e4acd3c122f5d1fe2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 21:21:59 +0800 Subject: [PATCH 06/13] doc(lexer): explain the use of incSourceColumn I think it is clearer to phrase it this way so it is clear that we are not unconsuming (i.e. changing the state of the parser). --- src/Lexer.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 3e98cce..273a0fe 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -221,7 +221,7 @@ link :: Lexer link = do pos <- getPosition l <- linkRaw - -- "unconsume" the last token + -- register the position of the last token pos' <- flip incSourceColumn (-1) <$> getPosition pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)] -- 2.49.1 From f3b3b08919662479df126597446738e0fb92ef6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 21:28:17 +0800 Subject: [PATCH 07/13] style(lexer): use "open" "close" in the type --- src/Lexer.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 273a0fe..436ebfe 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -136,12 +136,12 @@ delimitedAsTuple openP closeP = <*> located (Token <$> anyUntil closeP) <*> located closeP -delimited :: Parser a -> Parser b -> Token -> Token -> Parser [LocatedToken] +delimited :: Parser open -> Parser close -> Token -> Token -> Parser [LocatedToken] delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP) where asList (a, tok, b) = [a, tok, b] -delimitedNoTrailing :: Parser a -> Parser b -> Token -> Parser [LocatedToken] +delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken] delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) where asList (a, tok, _) = [a, tok] -- 2.49.1 From 6ec47dad043d4dd7f59d5ff03a54b4dae8ad1c7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 21:31:26 +0800 Subject: [PATCH 08/13] ref(lexer): rename MathParen -> MathInline --- src/Lexer.hs | 10 +++++----- test/Spec.hs | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 436ebfe..c2a7f01 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -51,8 +51,8 @@ data Token | ParenClose | BracketOpen | BracketClose - | MathsParenOpen - | MathsParenClose + | MathInlineOpen + | MathInlineClose | MathsBracketOpen | MathsBracketClose | NumericEntity Int @@ -86,7 +86,7 @@ lexText = go choice $ Parsec.try <$> [ mathsBracket - , mathsParens + , mathInline , escape -- maths go before escape to avoid mismatch , headers , newlineToken @@ -245,8 +245,8 @@ labeledLink = do mathsBracket :: Lexer mathsBracket = delimited "\\[" "\\]" MathsBracketOpen MathsBracketClose -mathsParens :: Lexer -mathsParens = delimited "\\(" "\\)" MathsParenOpen MathsParenClose +mathInline :: Lexer +mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose birdTrack :: Lexer birdTrack = delimitedNoTrailing ">> " eol BirdTrack diff --git a/test/Spec.hs b/test/Spec.hs index a09489e..21f5d36 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -98,9 +98,9 @@ maths = do , (1, 12, MathsBracketClose) ] "\\(other maths\\)" - `shouldLexTo` [ (1, 1, MathsParenOpen) + `shouldLexTo` [ (1, 1, MathInlineOpen) , (1, 3, Token "other maths") - , (1, 14, MathsParenClose) + , (1, 14, MathInlineClose) ] escaping :: Expectation -- 2.49.1 From c4d59d32369a4b0b3c11fbb3e3c6915a0a362d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 21:33:20 +0800 Subject: [PATCH 09/13] ref(lexer): rename MathsBracket -> MathMultiline --- src/Lexer.hs | 10 +++++----- test/Spec.hs | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index c2a7f01..04fa84f 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -53,8 +53,8 @@ data Token | BracketClose | MathInlineOpen | MathInlineClose - | MathsBracketOpen - | MathsBracketClose + | MathMultilineOpen + | MathMultilineClose | NumericEntity Int | Module Text | QuoteOpen @@ -85,7 +85,7 @@ lexText = go toks <- choice $ Parsec.try - <$> [ mathsBracket + <$> [ mathMultiline , mathInline , escape -- maths go before escape to avoid mismatch , headers @@ -242,8 +242,8 @@ labeledLink = do , (pos8, LabeledLinkClose) ] -mathsBracket :: Lexer -mathsBracket = delimited "\\[" "\\]" MathsBracketOpen MathsBracketClose +mathMultiline :: Lexer +mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose mathInline :: Lexer mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose diff --git a/test/Spec.hs b/test/Spec.hs index 21f5d36..7ddbcff 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -93,9 +93,9 @@ anchors = maths :: IO () maths = do "\\[some math\\]" - `shouldLexTo` [ (1, 1, MathsBracketOpen) + `shouldLexTo` [ (1, 1, MathMultilineOpen) , (1, 3, Token "some math") - , (1, 12, MathsBracketClose) + , (1, 12, MathMultilineClose) ] "\\(other maths\\)" `shouldLexTo` [ (1, 1, MathInlineOpen) -- 2.49.1 From 326c7b681cd602dbe725a111efc0254f3700b44a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 22:19:37 +0800 Subject: [PATCH 10/13] fix(lexer): old anchor is only used in moduleName --- Grammar.ebnf | 2 +- src/Lexer.hs | 19 +++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/Grammar.ebnf b/Grammar.ebnf index bff331d..2404a67 100644 --- a/Grammar.ebnf +++ b/Grammar.ebnf @@ -12,7 +12,7 @@ bold ::= '__' text_no_newline '__' monospace ::= '@' text_content '@' link ::= module_link | hyperlink | markdown_link -module_link ::= '"' module_name ( '#' anchor_name )? '"' +module_link ::= '"' module_name ( ('#' | '\#') anchor_name )? '"' hyperlink ::= '<' url ( ' ' link_text )? '>' markdown_link ::= '[' link_text '](' ( url | module_link ) ')' diff --git a/src/Lexer.hs b/src/Lexer.hs index 04fa84f..9b33f03 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -94,7 +94,7 @@ lexText = go , link , labeledLink , modules - , anchors + , anchor , textElement , quotes , birdTrack @@ -152,9 +152,6 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2 eol :: Parser () eol = void "\n" <|> void "\r\n" <|> Parsec.eof -anchorHash :: Parser Text -anchorHash = "#" <|> try "\\#" - header1 :: Lexer header1 = delimitedNoTrailing "= " eol (Header One) @@ -174,11 +171,10 @@ header6 :: Lexer header6 = delimitedNoTrailing "====== " eol (Header Six) -- #anchors# -anchors :: Lexer -anchors = - tokenise - [ between anchorHash anchorHash (Anchor <$> anyUntil anchorHash) - ] +anchor :: Lexer +anchor = do + x <- located $ between "#" "#" (Anchor <$> anyUntil "#") + pure [x] moduleNames :: Parser Text @@ -192,7 +188,7 @@ identifierChar = satisfy (\c -> isAlphaNum c || c == '_') -- "Module.Name" -- "Module.Name#anchor" --- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben +-- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben modules :: Lexer modules = between (char '"') (char '"') inner where @@ -203,6 +199,9 @@ modules = between (char '"') (char '"') inner Just anchor -> [module_, anchor] Nothing -> [module_] + anchorHash :: Parser Text + anchorHash = "#" <|> try "\\#" + anchorText :: Parser Text anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) -- 2.49.1 From 29c015b79310aa4ecd6a51f9b895ad5910bb2766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 22:22:00 +0800 Subject: [PATCH 11/13] style(lexer): make binding naming consistent --- src/Lexer.hs | 12 ++++++------ test/Spec.hs | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 9b33f03..c44c0e4 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -93,7 +93,7 @@ lexText = go , spaceToken , link , labeledLink - , modules + , module_ , anchor , textElement , quotes @@ -189,15 +189,15 @@ identifierChar = satisfy (\c -> isAlphaNum c || c == '_') -- "Module.Name" -- "Module.Name#anchor" -- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben -modules :: Lexer -modules = between (char '"') (char '"') inner +module_ :: Lexer +module_ = between (char '"') (char '"') inner where inner = do - module_ <- located $ Module <$> moduleNames + m <- located $ Module <$> moduleNames mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText)) pure $ case mAnchor of - Just anchor -> [module_, anchor] - Nothing -> [module_] + Just anc -> [m, anc] + Nothing -> [m] anchorHash :: Parser Text anchorHash = "#" <|> try "\\#" diff --git a/test/Spec.hs b/test/Spec.hs index 7ddbcff..745aefa 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -19,8 +19,8 @@ main = hspec $ do describe "minimal" do it "handles unicode" unicode it "escapes" escaping - it "maths" maths - it "anchors" anchors + it "maths" math + it "anchors" anchor it "space chars" space it "bare string" someString it "emphasis" emphatic @@ -84,14 +84,14 @@ labeledLink = , (1, 35, LabeledLinkClose) ] -anchors :: Expectation -anchors = +anchor :: Expectation +anchor = "#myAnchor#" `shouldLexTo` [ (1, 1, Anchor "myAnchor") ] -maths :: IO () -maths = do +math :: IO () +math = do "\\[some math\\]" `shouldLexTo` [ (1, 1, MathMultilineOpen) , (1, 3, Token "some math") -- 2.49.1 From 2597e693f13e6e0b0e3622810cc3194c442ffdba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 22:31:59 +0800 Subject: [PATCH 12/13] ref(lexer): simplify labeledLink --- src/Lexer.hs | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index c44c0e4..7874863 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -226,20 +226,12 @@ link = do labeledLink :: Lexer labeledLink = do - pos <- getPosition - void $ string "<" - link' <- linkRaw - pos7 <- getPosition - label' <- anyUntil $ string ">" - pos8 <- getPosition - void $ ">" - + open <- located $ LabeledLinkOpen <$ "<" + linkRes <- linkRaw + labelRes <- located $ Token <$> anyUntil ">" + close <- located $ LabeledLinkClose <$ ">" pure $ - (pos, LabeledLinkOpen) - : link' - <> [ (pos7, Token label') - , (pos8, LabeledLinkClose) - ] + open : linkRes <> [ labelRes , close ] mathMultiline :: Lexer mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose -- 2.49.1 From 970b658926e4a683f6c557539ddf750803cabd44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 22:32:35 +0800 Subject: [PATCH 13/13] chore(lexer): clean up --- src/Lexer.hs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 7874863..426a7ff 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -10,7 +10,7 @@ import Control.Monad (mfilter, void) import Data.Functor (($>)) import Data.Text (Text, intercalate) import Data.Text qualified as Text -import GHC.Unicode (isAlpha, isAlphaNum, isControl, isPrint, isSpace, isUpper) +import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper) import ParserMonad (Parser, initialParserState) import Text.Parsec import Text.Parsec qualified as Parsec @@ -66,9 +66,6 @@ data Token located :: Parser a -> Parser (SourcePos, a) located p = (,) <$> getPosition <*> p -startPosition :: Parser a -> Parser SourcePos -startPosition = fmap fst . located - tokenise :: [Parser a] -> Parser [(SourcePos, a)] tokenise = sequence . map located -- 2.49.1