From 82dcb6913e04b203c965cdde59bbd3b15613fd3e Mon Sep 17 00:00:00 2001 From: Igor Ranieri Date: Sun, 21 Sep 2025 21:00:05 +0200 Subject: [PATCH] Parse links, module names --- src/Lexer.hs | 76 +++++++++++++++++++++++++++++++++++++++++----------- test/Spec.hs | 31 +++++++++++++++++++-- 2 files changed, 90 insertions(+), 17 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 438ac0e..3346389 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -8,10 +8,9 @@ module Lexer ( import Control.Monad (mfilter, void) import Data.Functor (($>)) -import Data.Text (Text) +import Data.Text (Text, intercalate) import Data.Text qualified as Text -import GHC.Stack (HasCallStack) -import GHC.Unicode (isControl, isPrint, isSpace) +import GHC.Unicode (isAlpha, isAlphaNum, isControl, isPrint, isSpace, isUpper) import ParserMonad (Parser, initialParserState) import Text.Parsec import Text.Parsec qualified as Parsec @@ -53,6 +52,7 @@ data Token | MathsParenClose | MathsBracketOpen | MathsBracketClose + | Module | QuoteOpen | QuoteClose | Space @@ -62,7 +62,7 @@ data Token lexer :: String -> Either ParseError [LocatedToken] lexer = Parsec.runParser lexText initialParserState "input" . Text.pack -lexText :: (HasCallStack) => Parser [LocatedToken] +lexText :: Parser [LocatedToken] lexText = go where go = do @@ -78,9 +78,11 @@ lexText = go , headers , newlineToken , spaceToken + , link , labeledLink - , quotes + , modules , textElement + , quotes , birdTrack , other ] @@ -157,10 +159,8 @@ header5 = delimitedMaybe (void $ "===== ") eol (Header Five) Nothing header6 :: Lexer header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing -labeledLink :: Lexer -labeledLink = do - pos <- getPosition - void $ string "<" +link :: Lexer +link = do pos1 <- getPosition void $ string "[" pos2 <- getPosition @@ -170,20 +170,66 @@ labeledLink = do pos4 <- getPosition void $ "(" pos5 <- getPosition - link <- anyUntil $ Text.pack <$> string ")" + link' <- anyUntil $ Text.pack <$> string ")" pos6 <- getPosition void $ ")" pure $ - [ (pos, LabeledLinkOpen) - , (pos1, BracketOpen) + [ (pos1, BracketOpen) , (pos2, Token text) , (pos3, BracketClose) , (pos4, ParenOpen) - , (pos5, Token link) + , (pos5, Token link') , (pos6, ParenClose) ] +-- "Module.Name" +-- "Module.Name#anchor" +-- "Module.Name#anchor" +modules :: Lexer +modules = do + pos <- getPosition + void $ char '"' + pos' <- getPosition + modName <- modId + anch <- option [] do + pos'' <- getPosition + void $ try (string "#" <|> string "\\#") + pos''' <- getPosition + a <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) + pure [(pos'', Anchor), (pos''', Token a)] + + void $ char '"' + pure $ [(pos, Module), (pos', Token modName)] <> anch + where + modId = intercalate "." <$> (fmap Text.pack <$> (conId `sepBy1` (char '.'))) + + conId :: Parser String + conId = + (:) + <$> satisfy (\c -> isAlpha c && isUpper c) + <*> many1 conChar + + conChar :: Parser Char + conChar = satisfy (\c -> isAlphaNum c || c == '_') + +labeledLink :: Lexer +labeledLink = do + pos <- getPosition + void $ string "<" + link' <- link + pos7 <- getPosition + label' <- anyUntil $ string ">" + pos8 <- getPosition + void $ ">" + + pure $ + (pos, LabeledLinkOpen) + : link' + <> [ (pos7, Token label') + , (pos8, LabeledLinkClose) + ] + mathsBracket :: Lexer mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose @@ -211,10 +257,10 @@ monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose other :: Lexer other = do pos <- getPosition - c <- takeWhile1_ isAlphaNum + c <- takeWhile1_ isUnicodeAlphaNum pure . pure $ (pos, Token c) where - isAlphaNum c = isPrint c && not (isControl c) && not (isSpace c) + isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c) spaceToken :: Lexer spaceToken = do diff --git a/test/Spec.hs b/test/Spec.hs index ce09fe0..2474046 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -24,7 +24,9 @@ main = hspec $ do it "emphasis" emphatic it "monospace" monospace it "labeled link" labeledLink + it "markdown link" link it "bird tracks" birdTracks + it "module names" modules it "quotes" quotes it "ignores nesting" ignoreNesting @@ -38,6 +40,31 @@ main = hspec $ do -- Tests ------------ +modules :: Expectation +modules = do + "\"MyModule.Name\"" + `shouldLexTo` [ (1, 1, Module) + , (1, 2, Token "MyModule.Name") + ] + + "\"OtherModule.Name#myAnchor\"" + `shouldLexTo` [ (1, 1, Module) + , (1, 2, Token "OtherModule.Name") + , (1, 18, Anchor) + , (1, 19, Token "myAnchor") + ] + +link :: Expectation +link = + "[link to](http://some.website)" + `shouldLexTo` [ (1, 1, BracketOpen) + , (1, 2, Token "link to") + , (1, 9, BracketClose) + , (1, 10, ParenOpen) + , (1, 11, Token "http://some.website") + , (1, 30, ParenClose) + ] + labeledLink :: Expectation labeledLink = "<[link here](http://to.here) label>" @@ -48,8 +75,8 @@ labeledLink = , (1, 13, ParenOpen) , (1, 14, Token "http://to.here") , (1, 28, ParenClose) - , (1, 29, Space) - , (1, 30, Token "label>") + , (1, 29, Token " label") + , (1, 35, LabeledLinkClose) ] maths :: IO ()