Parse links, module names

This commit is contained in:
Igor Ranieri 2025-09-21 21:00:05 +02:00
parent 7d4cab4857
commit 82dcb6913e
2 changed files with 90 additions and 17 deletions

View file

@ -8,10 +8,9 @@ module Lexer (
import Control.Monad (mfilter, void) import Control.Monad (mfilter, void)
import Data.Functor (($>)) import Data.Functor (($>))
import Data.Text (Text) import Data.Text (Text, intercalate)
import Data.Text qualified as Text import Data.Text qualified as Text
import GHC.Stack (HasCallStack) import GHC.Unicode (isAlpha, isAlphaNum, isControl, isPrint, isSpace, isUpper)
import GHC.Unicode (isControl, isPrint, isSpace)
import ParserMonad (Parser, initialParserState) import ParserMonad (Parser, initialParserState)
import Text.Parsec import Text.Parsec
import Text.Parsec qualified as Parsec import Text.Parsec qualified as Parsec
@ -53,6 +52,7 @@ data Token
| MathsParenClose | MathsParenClose
| MathsBracketOpen | MathsBracketOpen
| MathsBracketClose | MathsBracketClose
| Module
| QuoteOpen | QuoteOpen
| QuoteClose | QuoteClose
| Space | Space
@ -62,7 +62,7 @@ data Token
lexer :: String -> Either ParseError [LocatedToken] lexer :: String -> Either ParseError [LocatedToken]
lexer = Parsec.runParser lexText initialParserState "input" . Text.pack lexer = Parsec.runParser lexText initialParserState "input" . Text.pack
lexText :: (HasCallStack) => Parser [LocatedToken] lexText :: Parser [LocatedToken]
lexText = go lexText = go
where where
go = do go = do
@ -78,9 +78,11 @@ lexText = go
, headers , headers
, newlineToken , newlineToken
, spaceToken , spaceToken
, link
, labeledLink , labeledLink
, quotes , modules
, textElement , textElement
, quotes
, birdTrack , birdTrack
, other , other
] ]
@ -157,10 +159,8 @@ header5 = delimitedMaybe (void $ "===== ") eol (Header Five) Nothing
header6 :: Lexer header6 :: Lexer
header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing
labeledLink :: Lexer link :: Lexer
labeledLink = do link = do
pos <- getPosition
void $ string "<"
pos1 <- getPosition pos1 <- getPosition
void $ string "[" void $ string "["
pos2 <- getPosition pos2 <- getPosition
@ -170,20 +170,66 @@ labeledLink = do
pos4 <- getPosition pos4 <- getPosition
void $ "(" void $ "("
pos5 <- getPosition pos5 <- getPosition
link <- anyUntil $ Text.pack <$> string ")" link' <- anyUntil $ Text.pack <$> string ")"
pos6 <- getPosition pos6 <- getPosition
void $ ")" void $ ")"
pure $ pure $
[ (pos, LabeledLinkOpen) [ (pos1, BracketOpen)
, (pos1, BracketOpen)
, (pos2, Token text) , (pos2, Token text)
, (pos3, BracketClose) , (pos3, BracketClose)
, (pos4, ParenOpen) , (pos4, ParenOpen)
, (pos5, Token link) , (pos5, Token link')
, (pos6, ParenClose) , (pos6, ParenClose)
] ]
-- "Module.Name"
-- "Module.Name#anchor"
-- "Module.Name#anchor"
modules :: Lexer
modules = do
pos <- getPosition
void $ char '"'
pos' <- getPosition
modName <- modId
anch <- option [] do
pos'' <- getPosition
void $ try (string "#" <|> string "\\#")
pos''' <- getPosition
a <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
pure [(pos'', Anchor), (pos''', Token a)]
void $ char '"'
pure $ [(pos, Module), (pos', Token modName)] <> anch
where
modId = intercalate "." <$> (fmap Text.pack <$> (conId `sepBy1` (char '.')))
conId :: Parser String
conId =
(:)
<$> satisfy (\c -> isAlpha c && isUpper c)
<*> many1 conChar
conChar :: Parser Char
conChar = satisfy (\c -> isAlphaNum c || c == '_')
labeledLink :: Lexer
labeledLink = do
pos <- getPosition
void $ string "<"
link' <- link
pos7 <- getPosition
label' <- anyUntil $ string ">"
pos8 <- getPosition
void $ ">"
pure $
(pos, LabeledLinkOpen)
: link'
<> [ (pos7, Token label')
, (pos8, LabeledLinkClose)
]
mathsBracket :: Lexer mathsBracket :: Lexer
mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose
@ -211,10 +257,10 @@ monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
other :: Lexer other :: Lexer
other = do other = do
pos <- getPosition pos <- getPosition
c <- takeWhile1_ isAlphaNum c <- takeWhile1_ isUnicodeAlphaNum
pure . pure $ (pos, Token c) pure . pure $ (pos, Token c)
where where
isAlphaNum c = isPrint c && not (isControl c) && not (isSpace c) isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
spaceToken :: Lexer spaceToken :: Lexer
spaceToken = do spaceToken = do

View file

@ -24,7 +24,9 @@ main = hspec $ do
it "emphasis" emphatic it "emphasis" emphatic
it "monospace" monospace it "monospace" monospace
it "labeled link" labeledLink it "labeled link" labeledLink
it "markdown link" link
it "bird tracks" birdTracks it "bird tracks" birdTracks
it "module names" modules
it "quotes" quotes it "quotes" quotes
it "ignores nesting" ignoreNesting it "ignores nesting" ignoreNesting
@ -38,6 +40,31 @@ main = hspec $ do
-- Tests -- Tests
------------ ------------
modules :: Expectation
modules = do
"\"MyModule.Name\""
`shouldLexTo` [ (1, 1, Module)
, (1, 2, Token "MyModule.Name")
]
"\"OtherModule.Name#myAnchor\""
`shouldLexTo` [ (1, 1, Module)
, (1, 2, Token "OtherModule.Name")
, (1, 18, Anchor)
, (1, 19, Token "myAnchor")
]
link :: Expectation
link =
"[link to](http://some.website)"
`shouldLexTo` [ (1, 1, BracketOpen)
, (1, 2, Token "link to")
, (1, 9, BracketClose)
, (1, 10, ParenOpen)
, (1, 11, Token "http://some.website")
, (1, 30, ParenClose)
]
labeledLink :: Expectation labeledLink :: Expectation
labeledLink = labeledLink =
"<[link here](http://to.here) label>" "<[link here](http://to.here) label>"
@ -48,8 +75,8 @@ labeledLink =
, (1, 13, ParenOpen) , (1, 13, ParenOpen)
, (1, 14, Token "http://to.here") , (1, 14, Token "http://to.here")
, (1, 28, ParenClose) , (1, 28, ParenClose)
, (1, 29, Space) , (1, 29, Token " label")
, (1, 30, Token "label>") , (1, 35, LabeledLinkClose)
] ]
maths :: IO () maths :: IO ()