From 678158d6146857de1a4db9005f9e678e443a07cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 18:35:15 +0800 Subject: [PATCH 1/2] fix(lexer): handle crlf in newline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do we support windows 🤔 --- src/Lexer.hs | 2 +- test/Spec.hs | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index e12324b..87f0ee0 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -152,7 +152,7 @@ delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric s t1 t2 = delimited s s t1 t2 eol :: Parser () -eol = void "\n" <|> Parsec.eof +eol = void "\n" <|> void "\r\n" <|> Parsec.eof header1 :: Lexer header1 = delimitedMaybe (void $ "= ") eol (Header One) Nothing diff --git a/test/Spec.hs b/test/Spec.hs index d202ac2..2545c20 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -106,11 +106,15 @@ maths = do ] escaping :: Expectation -escaping = +escaping = do "\\(" `shouldLexTo` [ (1, 1, Escape) , (1, 2, Token "(") ] + "\\(\r\n" + `shouldLexTo` [ (1, 1, Escape) + , (1, 2, Token "(") + ] unicode :: Expectation unicode = From 5861fd8a3f5485920641600a4d101913ad04903a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 20:15:41 +0800 Subject: [PATCH 2/2] ref(lexer): simplify delimited logic --- src/Lexer.hs | 56 +++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 87f0ee0..a4b00bb 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -16,6 +16,7 @@ import Text.Parsec import Text.Parsec qualified as Parsec import Text.Parsec.Pos (updatePosChar) +type Located a = (SourcePos, a) type LocatedToken = (SourcePos, Token) type Lexer = Parser [LocatedToken] @@ -125,28 +126,25 @@ headers = , header6 ] -delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken] -delimitedMaybe openMark closeMark openToken closeToken = do - openPos <- getPosition - void openMark - tokenPos <- getPosition - content <- anyUntil closeMark - closePos <- getPosition - void closeMark - - let openTok :: LocatedToken = (openPos, openToken) - res :: LocatedToken = (tokenPos, Token content) - closeToks :: [LocatedToken] = case closeToken of - Just close -> [(closePos, close)] - Nothing -> [] - - pure $ [openTok, res] <> closeToks - anyUntil :: Parser a -> Parser Text anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p) -delimited :: Parser a -> Parser a -> Token -> Token -> Parser [LocatedToken] -delimited a b c d = delimitedMaybe a b c (Just d) +delimitedAsTuple :: Parser open -> Parser close -> Parser (Located open, LocatedToken, Located close) +delimitedAsTuple openP closeP = + (,,) + <$> located openP + <*> located (Token <$> anyUntil closeP) + <*> located closeP + +delimited :: Parser a -> Parser b -> Token -> Token -> Parser [LocatedToken] +delimited openP closeP openTok closeTok = fuse <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP) + where + fuse (a, tok, b) = [a, tok, b] + +delimitedNoTrailing :: Parser a -> Parser b -> Token -> Parser [LocatedToken] +delimitedNoTrailing openP closeP openTok = fuse <$> delimitedAsTuple (openTok <$ openP) (void closeP) + where + fuse (a, tok, _) = [a, tok] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric s t1 t2 = delimited s s t1 t2 @@ -155,22 +153,22 @@ eol :: Parser () eol = void "\n" <|> void "\r\n" <|> Parsec.eof header1 :: Lexer -header1 = delimitedMaybe (void $ "= ") eol (Header One) Nothing +header1 = delimitedNoTrailing "= " eol (Header One) header2 :: Lexer -header2 = delimitedMaybe (void $ "== ") eol (Header Two) Nothing +header2 = delimitedNoTrailing "== " eol (Header Two) header3 :: Lexer -header3 = delimitedMaybe (void $ "=== ") eol (Header Three) Nothing +header3 = delimitedNoTrailing "=== " eol (Header Three) header4 :: Lexer -header4 = delimitedMaybe (void $ "==== ") eol (Header Four) Nothing +header4 = delimitedNoTrailing "==== " eol (Header Four) header5 :: Lexer -header5 = delimitedMaybe (void $ "===== ") eol (Header Five) Nothing +header5 = delimitedNoTrailing "===== " eol (Header Five) header6 :: Lexer -header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing +header6 = delimitedNoTrailing "====== " eol (Header Six) -- #anchors# anchors :: Lexer @@ -247,16 +245,16 @@ labeledLink = do ] mathsBracket :: Lexer -mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose +mathsBracket = delimited "\\[" "\\]" MathsBracketOpen MathsBracketClose mathsParens :: Lexer -mathsParens = delimited (void $ "\\(") (void "\\)") MathsParenOpen MathsParenClose +mathsParens = delimited "\\(" "\\)" MathsParenOpen MathsParenClose birdTrack :: Lexer -birdTrack = delimitedMaybe (void ">> ") eol BirdTrack Nothing +birdTrack = delimitedNoTrailing ">> " eol BirdTrack escape :: Lexer -escape = delimitedMaybe (void "\\") eol Escape Nothing +escape = delimitedNoTrailing "\\" eol Escape quotes :: Lexer quotes = delimitedSymmetric "\"" QuoteOpen QuoteClose