Adjusted others, labeled link

This commit is contained in:
Igor Ranieri 2025-09-21 13:30:01 +02:00
parent 4ef8d2c28c
commit 7d4cab4857
2 changed files with 106 additions and 41 deletions

View file

@ -7,12 +7,11 @@ module Lexer (
) where
import Control.Monad (mfilter, void)
import Data.Char (isAlphaNum, isPrint)
import Data.Functor (($>))
import Data.Text (Text)
import Data.Text qualified as Text
import Debug.Trace (traceM)
import GHC.Stack (HasCallStack)
import GHC.Unicode (isControl, isPrint, isSpace)
import ParserMonad (Parser, initialParserState)
import Text.Parsec
import Text.Parsec qualified as Parsec
@ -22,20 +21,34 @@ type LocatedToken = (SourcePos, Token)
type Lexer = Parser [LocatedToken]
data Level
= One
| Two
| Three
| Four
| Five
| Six
deriving (Eq, Show)
data Token
= Token Text
| Anchor
| AngleOpen
| AngleClose
| BirdTrack
| BoldOpen
| BoldClose
| Escape
| EmphasisOpen
| EmphasisClose
| Header Level
| MonospaceOpen
| MonospaceClose
| Newline
| Escape
| LabeledLinkOpen
| LabeledLinkClose
| ParenOpen
| ParenClose
| BracketOpen
| BracketClose
| MathsParenOpen
| MathsParenClose
| MathsBracketOpen
@ -62,25 +75,18 @@ lexText = go
<$> [ mathsBracket
, mathsParens
, escape -- maths go before escape to avoid mismatch
, headers
, newlineToken
, spaceToken
, labeledLink
, quotes
, textElement
, identifier
, birdTrack
, other
]
rest <- go
pure (toks <> rest)
match :: Parser a -> Parser (Text, a)
match p = do
input <- getInput
result <- p
input' <- getInput
let !consumed = Text.take (Text.length input - Text.length input') input
pure (consumed, result)
-- Tokens
textElement :: Parser [LocatedToken]
@ -90,26 +96,39 @@ textElement =
<$> [ emphasis
, bold
, monospace
, angles
]
headers :: Parser [LocatedToken]
headers =
choice $
Parsec.try
<$> [ header1
, header2
, header3
, header4
, header5
, header6
]
delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken]
delimitedMaybe op cl ot ct = do
delimitedMaybe openMark closeMark openToken closeToken = do
openPos <- getPosition
void op -- opening
void openMark
tokenPos <- getPosition
content <- any'
content <- anyUntil closeMark
closePos <- getPosition
void cl
let openTok :: LocatedToken = (openPos, ot)
void closeMark
let openTok :: LocatedToken = (openPos, openToken)
res :: LocatedToken = (tokenPos, Token content)
closeToks :: [LocatedToken] = case ct of
closeToks :: [LocatedToken] = case closeToken of
Just close -> [(closePos, close)]
Nothing -> []
pure $ [openTok, res] <> closeToks
where
any' = Text.pack <$> manyTill anyChar (lookAhead cl)
anyUntil :: Parser a -> Parser Text
anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p)
delimited :: Parser a -> Parser a -> Token -> Token -> Parser [LocatedToken]
delimited a b c d = delimitedMaybe a b c (Just d)
@ -120,6 +139,51 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2
eol :: Parser ()
eol = void "\n" <|> Parsec.eof
header1 :: Lexer
header1 = delimitedMaybe (void $ "= ") eol (Header One) Nothing
header2 :: Lexer
header2 = delimitedMaybe (void $ "== ") eol (Header Two) Nothing
header3 :: Lexer
header3 = delimitedMaybe (void $ "=== ") eol (Header Three) Nothing
header4 :: Lexer
header4 = delimitedMaybe (void $ "==== ") eol (Header Four) Nothing
header5 :: Lexer
header5 = delimitedMaybe (void $ "===== ") eol (Header Five) Nothing
header6 :: Lexer
header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing
labeledLink :: Lexer
labeledLink = do
pos <- getPosition
void $ string "<"
pos1 <- getPosition
void $ string "["
pos2 <- getPosition
text <- anyUntil $ Text.pack <$> string "]"
pos3 <- getPosition
void $ "]"
pos4 <- getPosition
void $ "("
pos5 <- getPosition
link <- anyUntil $ Text.pack <$> string ")"
pos6 <- getPosition
void $ ")"
pure $
[ (pos, LabeledLinkOpen)
, (pos1, BracketOpen)
, (pos2, Token text)
, (pos3, BracketClose)
, (pos4, ParenOpen)
, (pos5, Token link)
, (pos6, ParenClose)
]
mathsBracket :: Lexer
mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose
@ -144,20 +208,13 @@ bold = delimitedSymmetric "__" BoldOpen BoldClose
monospace :: Lexer
monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
-- parens :: Parser [LocatedToken]
-- parens = delimited "(" ")" ParenOpen ParenClose
-- brackets :: Lexer
-- brackets = delimited "[" "]" ParenOpen ParenClose
angles :: Parser [LocatedToken]
angles = delimited "<" ">" AngleOpen AngleClose
other :: Lexer
other = do
pos <- getPosition
c <- takeWhile1_ isPrint
c <- takeWhile1_ isAlphaNum
pure . pure $ (pos, Token c)
where
isAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
spaceToken :: Lexer
spaceToken = do
@ -171,12 +228,6 @@ newlineToken = do
_ <- newline
pure . pure $ (pos, Newline)
identifier :: Lexer
identifier = do
pos <- getPosition
txt <- takeWhile1_ isAlphaNum
pure . pure $ (pos, Token txt)
-------
-- Helpers
-------