Improved lexer funcs

This commit is contained in:
Igor Ranieri 2025-09-21 10:36:25 +02:00
parent f586b90434
commit e81f1ea4f7
2 changed files with 118 additions and 67 deletions

View file

@ -6,7 +6,7 @@ module Lexer (
emphasis, emphasis,
) where ) where
import Control.Monad (mfilter) import Control.Monad (mfilter, void)
import Data.Char (isAlphaNum, isPrint) import Data.Char (isAlphaNum, isPrint)
import Data.Functor (($>)) import Data.Functor (($>))
import Data.Text (Text) import Data.Text (Text)
@ -26,6 +26,7 @@ data Token
| Anchor | Anchor
| AngleOpen | AngleOpen
| AngleClose | AngleClose
| BirdTrack
| BoldOpen | BoldOpen
| BoldClose | BoldClose
| BracketOpen | BracketOpen
@ -54,13 +55,16 @@ lexText = go
Just _ -> pure [] Just _ -> pure []
Nothing -> do Nothing -> do
toks <- toks <-
choice choice $
[ newlineToken Parsec.try
, spaceToken <$> [ newlineToken
, textElement , spaceToken
, identifier , quotes
, other , textElement
] , identifier
, birdTrack
, other
]
rest <- go rest <- go
pure (toks <> rest) pure (toks <> rest)
@ -86,31 +90,40 @@ textElement =
, angles , angles
] ]
delimited :: String -> String -> Token -> Token -> Parser [LocatedToken] delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken]
delimited c1 c2 ot ct = do delimitedMaybe op cl ot ct = do
(_, content) <- match $ between op cl any'
pos <- getPosition pos <- getPosition
let openTok :: LocatedToken = (pos, ot) (text, content) <- match $ between op cl any'
closeTok :: LocatedToken = (pos, ct) let openTok :: LocatedToken = (setSourceColumn pos 1, ot)
res :: LocatedToken = (pos, Token content) res :: LocatedToken = (setSourceColumn pos 2, Token content)
closeToks :: [LocatedToken] = case ct of
Just close -> [(setSourceColumn pos (Text.length text), close)]
Nothing -> []
pure [openTok, res, closeTok] pure $ [openTok, res] <> closeToks
where where
op = string c1
cl = string c2
any' = Text.pack <$> manyTill anyChar (lookAhead cl) any' = Text.pack <$> manyTill anyChar (lookAhead cl)
delimited' :: String -> Token -> Token -> Parser [LocatedToken] delimited :: Parser a -> Parser a -> Token -> Token -> Parser [LocatedToken]
delimited' s t1 t2 = delimited s s t1 t2 delimited a b c d = delimitedMaybe a b c (Just d)
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
delimitedSymmetric s t1 t2 = delimited s s t1 t2
birdTrack :: Lexer
birdTrack = delimitedMaybe (void ">> ") (void "\n" <|> Parsec.eof) BirdTrack Nothing
quotes :: Lexer
quotes = delimitedSymmetric "\"" QuoteOpen QuoteClose
emphasis :: Lexer emphasis :: Lexer
emphasis = delimited' "/" EmphasisOpen EmphasisClose emphasis = delimitedSymmetric "/" EmphasisOpen EmphasisClose
bold :: Lexer bold :: Lexer
bold = delimited' "__" BoldOpen BoldClose bold = delimitedSymmetric "__" BoldOpen BoldClose
monospace :: Lexer monospace :: Lexer
monospace = delimited' "@" MonospaceOpen MonospaceClose monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
parens :: Parser [LocatedToken] parens :: Parser [LocatedToken]
parens = delimited "(" ")" ParenOpen ParenClose parens = delimited "(" ")" ParenOpen ParenClose

View file

@ -12,8 +12,87 @@ import Data.String (IsString (..))
import Data.Text (Text) import Data.Text (Text)
import Text.Parsec.Pos import Text.Parsec.Pos
shouldParseTo :: Text -> DocMarkup mod Identifier -> Expectation main :: IO ()
shouldParseTo input ast = parseText input `shouldBe` ast main = hspec $ do
describe "Lexer" do
describe "minimal" do
it "space chars" space
it "bare string" someString
it "emphasis" emphatic
it "monospace" monospace
it "bird tracks" birdTracks
it "quotes" quotes
it "ignores nesting" ignoreNesting
describe "Parser" do
it "Bold" do
"__bold__" `shouldParseTo` (DocBold (DocString "bold"))
it "Emphasis" do
"/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis"))
------------
-- Tests
------------
ignoreNesting :: Expectation
ignoreNesting =
">/foo/"
`shouldLexTo` [ (1, 1, Token ">/foo/")
]
birdTracks :: Expectation
birdTracks =
">> code"
`shouldLexTo` [ (1, 1, BirdTrack)
, (1, 2, Token "code")
]
quotes :: Expectation
quotes =
"\"quoted\""
`shouldLexTo` [ (1, 1, QuoteOpen)
, (1, 2, Token "quoted")
, (1, 8, QuoteClose)
]
space :: Expectation
space = do
"\n "
`shouldLexTo` [ (1, 1, Newline)
, (2, 1, Space)
]
" \n"
`shouldLexTo` [ (1, 1, Space)
, (1, 2, Newline)
]
monospace :: Expectation
monospace =
"@mono@"
`shouldLexTo` [ (1, 1, MonospaceOpen)
, (1, 2, Token "mono")
, (1, 6, MonospaceClose)
]
emphatic :: Expectation
emphatic =
"/emphatic/"
`shouldLexTo` [ (1, 1, EmphasisOpen)
, (1, 2, Token "emphatic")
, (1, 10, EmphasisClose)
]
someString :: Expectation
someString =
"some string"
`shouldLexTo` [ (1, 1, Token "some")
, (1, 5, Space)
, (1, 6, Token "string")
]
--------------
-- Helpers
--------------
type Doc id = DocMarkup () id type Doc id = DocMarkup () id
@ -28,46 +107,5 @@ shouldLexTo input expected =
actual `shouldBe` expected actual `shouldBe` expected
Left err -> expectationFailure $ "Parse error: " <> show err Left err -> expectationFailure $ "Parse error: " <> show err
main :: IO () shouldParseTo :: Text -> DocMarkup mod Identifier -> Expectation
main = hspec $ do shouldParseTo input ast = parseText input `shouldBe` ast
describe "Lexer" do
describe "minimal" do
it "bare string" someString
it "emphasis" emphatic
it "monospace" monospace
it "ignores nesting" ignoreNesting
describe "Parser" do
it "Bold" do
"__bold__" `shouldParseTo` (DocBold (DocString "bold"))
it "Emphasis" do
"/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis"))
monospace :: Expectation
monospace =
"@mono@"
`shouldLexTo` [ (1, 7, MonospaceOpen)
, (1, 7, Token "mono")
, (1, 7, MonospaceClose)
]
ignoreNesting :: Expectation
ignoreNesting =
">/foo/"
`shouldLexTo` [ (1, 1, Token ">/foo/")
]
emphatic :: Expectation
emphatic =
"/emphatic/"
`shouldLexTo` [ (1, 11, EmphasisOpen)
, (1, 11, Token "emphatic")
, (1, 11, EmphasisClose)
]
someString :: Expectation
someString =
"some string"
`shouldLexTo` [ (1, 1, Token "some")
, (1, 5, Space)
, (1, 6, Token "string")
]