Compare commits

..

2 commits

Author SHA1 Message Date
5861fd8a3f
ref(lexer): simplify delimited logic 2025-09-24 20:16:39 +08:00
678158d614
fix(lexer): handle crlf in newline
Do we support windows 🤔
2025-09-24 18:35:15 +08:00
2 changed files with 33 additions and 31 deletions

View file

@ -16,6 +16,7 @@ import Text.Parsec
import Text.Parsec qualified as Parsec import Text.Parsec qualified as Parsec
import Text.Parsec.Pos (updatePosChar) import Text.Parsec.Pos (updatePosChar)
type Located a = (SourcePos, a)
type LocatedToken = (SourcePos, Token) type LocatedToken = (SourcePos, Token)
type Lexer = Parser [LocatedToken] type Lexer = Parser [LocatedToken]
@ -125,52 +126,49 @@ headers =
, header6 , header6
] ]
delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken]
delimitedMaybe openMark closeMark openToken closeToken = do
openPos <- getPosition
void openMark
tokenPos <- getPosition
content <- anyUntil closeMark
closePos <- getPosition
void closeMark
let openTok :: LocatedToken = (openPos, openToken)
res :: LocatedToken = (tokenPos, Token content)
closeToks :: [LocatedToken] = case closeToken of
Just close -> [(closePos, close)]
Nothing -> []
pure $ [openTok, res] <> closeToks
anyUntil :: Parser a -> Parser Text anyUntil :: Parser a -> Parser Text
anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p) anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p)
delimited :: Parser a -> Parser a -> Token -> Token -> Parser [LocatedToken] delimitedAsTuple :: Parser open -> Parser close -> Parser (Located open, LocatedToken, Located close)
delimited a b c d = delimitedMaybe a b c (Just d) delimitedAsTuple openP closeP =
(,,)
<$> located openP
<*> located (Token <$> anyUntil closeP)
<*> located closeP
delimited :: Parser a -> Parser b -> Token -> Token -> Parser [LocatedToken]
delimited openP closeP openTok closeTok = fuse <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP)
where
fuse (a, tok, b) = [a, tok, b]
delimitedNoTrailing :: Parser a -> Parser b -> Token -> Parser [LocatedToken]
delimitedNoTrailing openP closeP openTok = fuse <$> delimitedAsTuple (openTok <$ openP) (void closeP)
where
fuse (a, tok, _) = [a, tok]
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
delimitedSymmetric s t1 t2 = delimited s s t1 t2 delimitedSymmetric s t1 t2 = delimited s s t1 t2
eol :: Parser () eol :: Parser ()
eol = void "\n" <|> Parsec.eof eol = void "\n" <|> void "\r\n" <|> Parsec.eof
header1 :: Lexer header1 :: Lexer
header1 = delimitedMaybe (void $ "= ") eol (Header One) Nothing header1 = delimitedNoTrailing "= " eol (Header One)
header2 :: Lexer header2 :: Lexer
header2 = delimitedMaybe (void $ "== ") eol (Header Two) Nothing header2 = delimitedNoTrailing "== " eol (Header Two)
header3 :: Lexer header3 :: Lexer
header3 = delimitedMaybe (void $ "=== ") eol (Header Three) Nothing header3 = delimitedNoTrailing "=== " eol (Header Three)
header4 :: Lexer header4 :: Lexer
header4 = delimitedMaybe (void $ "==== ") eol (Header Four) Nothing header4 = delimitedNoTrailing "==== " eol (Header Four)
header5 :: Lexer header5 :: Lexer
header5 = delimitedMaybe (void $ "===== ") eol (Header Five) Nothing header5 = delimitedNoTrailing "===== " eol (Header Five)
header6 :: Lexer header6 :: Lexer
header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing header6 = delimitedNoTrailing "====== " eol (Header Six)
-- #anchors# -- #anchors#
anchors :: Lexer anchors :: Lexer
@ -247,16 +245,16 @@ labeledLink = do
] ]
mathsBracket :: Lexer mathsBracket :: Lexer
mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose mathsBracket = delimited "\\[" "\\]" MathsBracketOpen MathsBracketClose
mathsParens :: Lexer mathsParens :: Lexer
mathsParens = delimited (void $ "\\(") (void "\\)") MathsParenOpen MathsParenClose mathsParens = delimited "\\(" "\\)" MathsParenOpen MathsParenClose
birdTrack :: Lexer birdTrack :: Lexer
birdTrack = delimitedMaybe (void ">> ") eol BirdTrack Nothing birdTrack = delimitedNoTrailing ">> " eol BirdTrack
escape :: Lexer escape :: Lexer
escape = delimitedMaybe (void "\\") eol Escape Nothing escape = delimitedNoTrailing "\\" eol Escape
quotes :: Lexer quotes :: Lexer
quotes = delimitedSymmetric "\"" QuoteOpen QuoteClose quotes = delimitedSymmetric "\"" QuoteOpen QuoteClose

View file

@ -106,11 +106,15 @@ maths = do
] ]
escaping :: Expectation escaping :: Expectation
escaping = escaping = do
"\\(" "\\("
`shouldLexTo` [ (1, 1, Escape) `shouldLexTo` [ (1, 1, Escape)
, (1, 2, Token "(") , (1, 2, Token "(")
] ]
"\\(\r\n"
`shouldLexTo` [ (1, 1, Escape)
, (1, 2, Token "(")
]
unicode :: Expectation unicode :: Expectation
unicode = unicode =