Compare commits

...
Sign in to create a new pull request.

3 commits

Author SHA1 Message Date
3b5c904239 Trying something to get rid of so much try/choice.
Some checks failed
Haskell CI / build (pull_request) Successful in 2m49s
Haskell CI / test (pull_request) Successful in 2m28s
Haskell CI / fourmolu (pull_request) Successful in 7s
Haskell CI / hlint (pull_request) Failing after 5s
2025-10-05 11:42:47 +00:00
eb431a9c97 Added expression eval; adjusted birdtrack, added sol combinator. 2025-10-05 11:42:47 +00:00
6d4a941178 ref(lexer): attempt to not try on every token
...for a better error message and better perf
2025-10-05 11:42:47 +00:00
2 changed files with 84 additions and 28 deletions

View file

@ -7,7 +7,7 @@ module Lexer (
) )
where where
import Control.Monad (mfilter, void) import Control.Monad (guard, mfilter, void)
import Data.Char (ord, toLower) import Data.Char (ord, toLower)
import Data.Functor (($>)) import Data.Functor (($>))
import Data.Text (Text, intercalate) import Data.Text (Text, intercalate)
@ -42,6 +42,7 @@ data Token
| Escape | Escape
| EmphasisOpen | EmphasisOpen
| EmphasisClose | EmphasisClose
| Expression
| Header Level | Header Level
| MonospaceOpen | MonospaceOpen
| MonospaceClose | MonospaceClose
@ -82,28 +83,44 @@ lexText = go
Parsec.optionMaybe Parsec.eof >>= \case Parsec.optionMaybe Parsec.eof >>= \case
Just _ -> pure [] Just _ -> pure []
Nothing -> do Nothing -> do
toks <- toks <- topLevel
choice $
Parsec.try
<$> [ mathMultiline
, mathInline
, escape -- maths go before escape to avoid mismatch
, headers
, newlineToken
, spaceToken
, link
, labeledLink
, module_
, anchor
, numericEntity
, textElement
, quotes
, birdTrack
, other
]
rest <- go rest <- go
pure (toks <> rest) pure (toks <> rest)
topLevel = do
-- check for start-of-line markup first
lineStart <-
optionMaybe $
choice
[ try expression
, try birdTrack
, headers
]
case lineStart of
Just toks -> pure toks
Nothing ->
choice $
-- Sorted in
-- - longest to shortest parse path
-- - highest frequency to lowest frequency (for performance?)
-- - more exact to more freeform (the latter can be the former but not vice versa)
[ spaceToken
, newlineToken
, try module_
, quotes
, -- starts with "\"
try mathMultiline
, try mathInline
, escape
, labeledLink
, link
, anchor
, numericEntity
, textElement
, other
]
-- Tokens -- Tokens
textElement :: Parser [LocatedToken] textElement :: Parser [LocatedToken]
@ -143,16 +160,22 @@ delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <
asList (a, tok, b) = [a, tok, b] asList (a, tok, b) = [a, tok, b]
delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken] delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken]
delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) delimitedNoTrailing openP closeP openTok =
asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
where where
asList (a, tok, _) = [a, tok] asList (a, tok, _) = [a, tok]
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
delimitedSymmetric s = delimited s s delimitedSymmetric s = delimited s s
--- End of line // end of file
eol :: Parser () eol :: Parser ()
eol = void "\n" <|> void "\r\n" <|> Parsec.eof eol = void "\n" <|> void "\r\n" <|> Parsec.eof
-- Start of line // start of file
sol :: Parser ()
sol = getPosition >>= guard . (== 1) . sourceColumn
header1 :: Lexer header1 :: Lexer
header1 = delimitedNoTrailing "= " eol (Header One) header1 = delimitedNoTrailing "= " eol (Header One)
@ -239,8 +262,15 @@ mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose
mathInline :: Lexer mathInline :: Lexer
mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose
-- TODO: make sure this starts at column 0?
birdTrack :: Lexer birdTrack :: Lexer
birdTrack = delimitedNoTrailing ">> " eol BirdTrack birdTrack = delimitedNoTrailing (sol <* "> ") eol BirdTrack
-- TODO: also match following lines iff:
-- they start with alphanum
-- they're not empty
expression :: Lexer
expression = delimitedNoTrailing (sol <* ">>> ") eol Expression
escape :: Lexer escape :: Lexer
escape = delimitedNoTrailing "\\" eol Escape escape = delimitedNoTrailing "\\" eol Escape

View file

@ -18,7 +18,7 @@ main = hspec $ do
describe "minimal" do describe "minimal" do
it "handles unicode" unicode it "handles unicode" unicode
it "escapes" escaping it "escapes" escaping
it "maths" math it "maths" maths
it "anchors" anchor it "anchors" anchor
it "space chars" space it "space chars" space
it "bare string" someString it "bare string" someString
@ -29,6 +29,7 @@ main = hspec $ do
it "bird tracks" birdTracks it "bird tracks" birdTracks
it "module names" modules it "module names" modules
it "quotes" quotes it "quotes" quotes
it "expressions" expressions
it "numeric entity" numericEntity it "numeric entity" numericEntity
it "ignores nesting" ignoreNesting it "ignores nesting" ignoreNesting
@ -91,8 +92,8 @@ anchor =
`shouldLexTo` [ (1, 1, Anchor "myAnchor") `shouldLexTo` [ (1, 1, Anchor "myAnchor")
] ]
math :: IO () maths :: IO ()
math = do maths = do
"\\[some math\\]" "\\[some math\\]"
`shouldLexTo` [ (1, 1, MathMultilineOpen) `shouldLexTo` [ (1, 1, MathMultilineOpen)
, (1, 3, Token "some math") , (1, 3, Token "some math")
@ -128,10 +129,35 @@ ignoreNesting =
] ]
birdTracks :: Expectation birdTracks :: Expectation
birdTracks = birdTracks = do
">> code" "> code line"
`shouldLexTo` [ (1, 1, BirdTrack) `shouldLexTo` [ (1, 1, BirdTrack)
, (1, 4, Token "code") , (1, 3, Token "code line")
]
" > not code"
`shouldLexTo` [ (1, 1, Space)
, (1, 2, Token ">")
, (1, 3, Space)
, (1, 4, Token "not")
, (1, 7, Space)
, (1, 8, Token "code")
]
expressions :: Expectation
expressions = do
">>> eval this"
`shouldLexTo` [ (1, 1, Expression)
, (1, 5, Token "eval this")
]
" >>> not eval this"
`shouldLexTo` [ (1, 1, Space)
, (1, 2, Token ">>>")
, (1, 5, Space)
, (1, 6, Token "not")
, (1, 9, Space)
, (1, 10, Token "eval")
, (1, 14, Space)
, (1, 15, Token "this")
] ]
quotes :: Expectation quotes :: Expectation