Adjusted others, labeled link

This commit is contained in:
Igor Ranieri 2025-09-21 13:30:01 +02:00
parent 4ef8d2c28c
commit 7d4cab4857
2 changed files with 106 additions and 41 deletions

View file

@ -7,12 +7,11 @@ module Lexer (
) where ) where
import Control.Monad (mfilter, void) import Control.Monad (mfilter, void)
import Data.Char (isAlphaNum, isPrint)
import Data.Functor (($>)) import Data.Functor (($>))
import Data.Text (Text) import Data.Text (Text)
import Data.Text qualified as Text import Data.Text qualified as Text
import Debug.Trace (traceM)
import GHC.Stack (HasCallStack) import GHC.Stack (HasCallStack)
import GHC.Unicode (isControl, isPrint, isSpace)
import ParserMonad (Parser, initialParserState) import ParserMonad (Parser, initialParserState)
import Text.Parsec import Text.Parsec
import Text.Parsec qualified as Parsec import Text.Parsec qualified as Parsec
@ -22,20 +21,34 @@ type LocatedToken = (SourcePos, Token)
type Lexer = Parser [LocatedToken] type Lexer = Parser [LocatedToken]
data Level
= One
| Two
| Three
| Four
| Five
| Six
deriving (Eq, Show)
data Token data Token
= Token Text = Token Text
| Anchor | Anchor
| AngleOpen
| AngleClose
| BirdTrack | BirdTrack
| BoldOpen | BoldOpen
| BoldClose | BoldClose
| Escape
| EmphasisOpen | EmphasisOpen
| EmphasisClose | EmphasisClose
| Header Level
| MonospaceOpen | MonospaceOpen
| MonospaceClose | MonospaceClose
| Newline | Newline
| Escape | LabeledLinkOpen
| LabeledLinkClose
| ParenOpen
| ParenClose
| BracketOpen
| BracketClose
| MathsParenOpen | MathsParenOpen
| MathsParenClose | MathsParenClose
| MathsBracketOpen | MathsBracketOpen
@ -62,25 +75,18 @@ lexText = go
<$> [ mathsBracket <$> [ mathsBracket
, mathsParens , mathsParens
, escape -- maths go before escape to avoid mismatch , escape -- maths go before escape to avoid mismatch
, headers
, newlineToken , newlineToken
, spaceToken , spaceToken
, labeledLink
, quotes , quotes
, textElement , textElement
, identifier
, birdTrack , birdTrack
, other , other
] ]
rest <- go rest <- go
pure (toks <> rest) pure (toks <> rest)
match :: Parser a -> Parser (Text, a)
match p = do
input <- getInput
result <- p
input' <- getInput
let !consumed = Text.take (Text.length input - Text.length input') input
pure (consumed, result)
-- Tokens -- Tokens
textElement :: Parser [LocatedToken] textElement :: Parser [LocatedToken]
@ -90,26 +96,39 @@ textElement =
<$> [ emphasis <$> [ emphasis
, bold , bold
, monospace , monospace
, angles ]
headers :: Parser [LocatedToken]
headers =
choice $
Parsec.try
<$> [ header1
, header2
, header3
, header4
, header5
, header6
] ]
delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken] delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken]
delimitedMaybe op cl ot ct = do delimitedMaybe openMark closeMark openToken closeToken = do
openPos <- getPosition openPos <- getPosition
void op -- opening void openMark
tokenPos <- getPosition tokenPos <- getPosition
content <- any' content <- anyUntil closeMark
closePos <- getPosition closePos <- getPosition
void cl void closeMark
let openTok :: LocatedToken = (openPos, ot)
let openTok :: LocatedToken = (openPos, openToken)
res :: LocatedToken = (tokenPos, Token content) res :: LocatedToken = (tokenPos, Token content)
closeToks :: [LocatedToken] = case ct of closeToks :: [LocatedToken] = case closeToken of
Just close -> [(closePos, close)] Just close -> [(closePos, close)]
Nothing -> [] Nothing -> []
pure $ [openTok, res] <> closeToks pure $ [openTok, res] <> closeToks
where
any' = Text.pack <$> manyTill anyChar (lookAhead cl) anyUntil :: Parser a -> Parser Text
anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p)
delimited :: Parser a -> Parser a -> Token -> Token -> Parser [LocatedToken] delimited :: Parser a -> Parser a -> Token -> Token -> Parser [LocatedToken]
delimited a b c d = delimitedMaybe a b c (Just d) delimited a b c d = delimitedMaybe a b c (Just d)
@ -120,6 +139,51 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2
eol :: Parser () eol :: Parser ()
eol = void "\n" <|> Parsec.eof eol = void "\n" <|> Parsec.eof
header1 :: Lexer
header1 = delimitedMaybe (void $ "= ") eol (Header One) Nothing
header2 :: Lexer
header2 = delimitedMaybe (void $ "== ") eol (Header Two) Nothing
header3 :: Lexer
header3 = delimitedMaybe (void $ "=== ") eol (Header Three) Nothing
header4 :: Lexer
header4 = delimitedMaybe (void $ "==== ") eol (Header Four) Nothing
header5 :: Lexer
header5 = delimitedMaybe (void $ "===== ") eol (Header Five) Nothing
header6 :: Lexer
header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing
labeledLink :: Lexer
labeledLink = do
pos <- getPosition
void $ string "<"
pos1 <- getPosition
void $ string "["
pos2 <- getPosition
text <- anyUntil $ Text.pack <$> string "]"
pos3 <- getPosition
void $ "]"
pos4 <- getPosition
void $ "("
pos5 <- getPosition
link <- anyUntil $ Text.pack <$> string ")"
pos6 <- getPosition
void $ ")"
pure $
[ (pos, LabeledLinkOpen)
, (pos1, BracketOpen)
, (pos2, Token text)
, (pos3, BracketClose)
, (pos4, ParenOpen)
, (pos5, Token link)
, (pos6, ParenClose)
]
mathsBracket :: Lexer mathsBracket :: Lexer
mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose
@ -144,20 +208,13 @@ bold = delimitedSymmetric "__" BoldOpen BoldClose
monospace :: Lexer monospace :: Lexer
monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
-- parens :: Parser [LocatedToken]
-- parens = delimited "(" ")" ParenOpen ParenClose
-- brackets :: Lexer
-- brackets = delimited "[" "]" ParenOpen ParenClose
angles :: Parser [LocatedToken]
angles = delimited "<" ">" AngleOpen AngleClose
other :: Lexer other :: Lexer
other = do other = do
pos <- getPosition pos <- getPosition
c <- takeWhile1_ isPrint c <- takeWhile1_ isAlphaNum
pure . pure $ (pos, Token c) pure . pure $ (pos, Token c)
where
isAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
spaceToken :: Lexer spaceToken :: Lexer
spaceToken = do spaceToken = do
@ -171,12 +228,6 @@ newlineToken = do
_ <- newline _ <- newline
pure . pure $ (pos, Newline) pure . pure $ (pos, Newline)
identifier :: Lexer
identifier = do
pos <- getPosition
txt <- takeWhile1_ isAlphaNum
pure . pure $ (pos, Token txt)
------- -------
-- Helpers -- Helpers
------- -------

View file

@ -23,6 +23,7 @@ main = hspec $ do
it "bare string" someString it "bare string" someString
it "emphasis" emphatic it "emphasis" emphatic
it "monospace" monospace it "monospace" monospace
it "labeled link" labeledLink
it "bird tracks" birdTracks it "bird tracks" birdTracks
it "quotes" quotes it "quotes" quotes
it "ignores nesting" ignoreNesting it "ignores nesting" ignoreNesting
@ -37,6 +38,20 @@ main = hspec $ do
-- Tests -- Tests
------------ ------------
labeledLink :: Expectation
labeledLink =
"<[link here](http://to.here) label>"
`shouldLexTo` [ (1, 1, LabeledLinkOpen)
, (1, 2, BracketOpen)
, (1, 3, Token "link here")
, (1, 12, BracketClose)
, (1, 13, ParenOpen)
, (1, 14, Token "http://to.here")
, (1, 28, ParenClose)
, (1, 29, Space)
, (1, 30, Token "label>")
]
maths :: IO () maths :: IO ()
maths = do maths = do
"\\[some math\\]" "\\[some math\\]"
@ -60,8 +75,7 @@ escaping =
unicode :: Expectation unicode :: Expectation
unicode = unicode =
"ドラゴンクエストの冒険者🐉" "ドラゴンクエストの冒険者🐉"
`shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者") `shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者🐉")
, (1, 13, Token "🐉")
] ]
ignoreNesting :: Expectation ignoreNesting :: Expectation