Parse links, module names

This commit is contained in:
Igor Ranieri 2025-09-21 21:00:05 +02:00
parent 7d4cab4857
commit 82dcb6913e
2 changed files with 90 additions and 17 deletions

View file

@ -8,10 +8,9 @@ module Lexer (
import Control.Monad (mfilter, void)
import Data.Functor (($>))
import Data.Text (Text)
import Data.Text (Text, intercalate)
import Data.Text qualified as Text
import GHC.Stack (HasCallStack)
import GHC.Unicode (isControl, isPrint, isSpace)
import GHC.Unicode (isAlpha, isAlphaNum, isControl, isPrint, isSpace, isUpper)
import ParserMonad (Parser, initialParserState)
import Text.Parsec
import Text.Parsec qualified as Parsec
@ -53,6 +52,7 @@ data Token
| MathsParenClose
| MathsBracketOpen
| MathsBracketClose
| Module
| QuoteOpen
| QuoteClose
| Space
@ -62,7 +62,7 @@ data Token
lexer :: String -> Either ParseError [LocatedToken]
lexer = Parsec.runParser lexText initialParserState "input" . Text.pack
lexText :: (HasCallStack) => Parser [LocatedToken]
lexText :: Parser [LocatedToken]
lexText = go
where
go = do
@ -78,9 +78,11 @@ lexText = go
, headers
, newlineToken
, spaceToken
, link
, labeledLink
, quotes
, modules
, textElement
, quotes
, birdTrack
, other
]
@ -157,10 +159,8 @@ header5 = delimitedMaybe (void $ "===== ") eol (Header Five) Nothing
header6 :: Lexer
header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing
labeledLink :: Lexer
labeledLink = do
pos <- getPosition
void $ string "<"
link :: Lexer
link = do
pos1 <- getPosition
void $ string "["
pos2 <- getPosition
@ -170,20 +170,66 @@ labeledLink = do
pos4 <- getPosition
void $ "("
pos5 <- getPosition
link <- anyUntil $ Text.pack <$> string ")"
link' <- anyUntil $ Text.pack <$> string ")"
pos6 <- getPosition
void $ ")"
pure $
[ (pos, LabeledLinkOpen)
, (pos1, BracketOpen)
[ (pos1, BracketOpen)
, (pos2, Token text)
, (pos3, BracketClose)
, (pos4, ParenOpen)
, (pos5, Token link)
, (pos5, Token link')
, (pos6, ParenClose)
]
-- "Module.Name"
-- "Module.Name#anchor"
-- "Module.Name#anchor"
modules :: Lexer
modules = do
pos <- getPosition
void $ char '"'
pos' <- getPosition
modName <- modId
anch <- option [] do
pos'' <- getPosition
void $ try (string "#" <|> string "\\#")
pos''' <- getPosition
a <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
pure [(pos'', Anchor), (pos''', Token a)]
void $ char '"'
pure $ [(pos, Module), (pos', Token modName)] <> anch
where
modId = intercalate "." <$> (fmap Text.pack <$> (conId `sepBy1` (char '.')))
conId :: Parser String
conId =
(:)
<$> satisfy (\c -> isAlpha c && isUpper c)
<*> many1 conChar
conChar :: Parser Char
conChar = satisfy (\c -> isAlphaNum c || c == '_')
labeledLink :: Lexer
labeledLink = do
pos <- getPosition
void $ string "<"
link' <- link
pos7 <- getPosition
label' <- anyUntil $ string ">"
pos8 <- getPosition
void $ ">"
pure $
(pos, LabeledLinkOpen)
: link'
<> [ (pos7, Token label')
, (pos8, LabeledLinkClose)
]
mathsBracket :: Lexer
mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose
@ -211,10 +257,10 @@ monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
other :: Lexer
other = do
pos <- getPosition
c <- takeWhile1_ isAlphaNum
c <- takeWhile1_ isUnicodeAlphaNum
pure . pure $ (pos, Token c)
where
isAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
spaceToken :: Lexer
spaceToken = do

View file

@ -24,7 +24,9 @@ main = hspec $ do
it "emphasis" emphatic
it "monospace" monospace
it "labeled link" labeledLink
it "markdown link" link
it "bird tracks" birdTracks
it "module names" modules
it "quotes" quotes
it "ignores nesting" ignoreNesting
@ -38,6 +40,31 @@ main = hspec $ do
-- Tests
------------
modules :: Expectation
modules = do
"\"MyModule.Name\""
`shouldLexTo` [ (1, 1, Module)
, (1, 2, Token "MyModule.Name")
]
"\"OtherModule.Name#myAnchor\""
`shouldLexTo` [ (1, 1, Module)
, (1, 2, Token "OtherModule.Name")
, (1, 18, Anchor)
, (1, 19, Token "myAnchor")
]
link :: Expectation
link =
"[link to](http://some.website)"
`shouldLexTo` [ (1, 1, BracketOpen)
, (1, 2, Token "link to")
, (1, 9, BracketClose)
, (1, 10, ParenOpen)
, (1, 11, Token "http://some.website")
, (1, 30, ParenClose)
]
labeledLink :: Expectation
labeledLink =
"<[link here](http://to.here) label>"
@ -48,8 +75,8 @@ labeledLink =
, (1, 13, ParenOpen)
, (1, 14, Token "http://to.here")
, (1, 28, ParenClose)
, (1, 29, Space)
, (1, 30, Token "label>")
, (1, 29, Token " label")
, (1, 35, LabeledLinkClose)
]
maths :: IO ()