Compare commits

...

1 commit

Author SHA1 Message Date
e18082db1b
ref(lexer): attempt to not try on every token
All checks were successful
Haskell CI / build (pull_request) Successful in 2m55s
Haskell CI / test (pull_request) Successful in 2m26s
Haskell CI / fourmolu (pull_request) Successful in 6s
Haskell CI / hlint (pull_request) Successful in 5s
...for a better error message and better perf
2025-09-28 12:41:23 +08:00

View file

@ -82,28 +82,43 @@ lexText = go
Parsec.optionMaybe Parsec.eof >>= \case
Just _ -> pure []
Nothing -> do
toks <-
choice $
Parsec.try
<$> [ mathMultiline
, mathInline
, escape -- maths go before escape to avoid mismatch
, headers
, newlineToken
, spaceToken
, link
, labeledLink
, module_
, anchor
, numericEntity
, textElement
, quotes
, birdTrack
, other
]
toks <- topLevel
rest <- go
pure (toks <> rest)
{- FOURMOLU_DISABLE -}
topLevel =
-- backtracking here so we always have a chance to try "other", the "catch-all-leave-to-parser-to-deal-with" choice
-- TODO: is this desirable? do we throw lexer error at all?
try
( choice
-- Sorted in
-- - longest to shortest parse path
-- - highest frequency to lowest frequency (for performance?)
-- - more exact to more freeform (the latter can be the former but not vice versa)
[ spaceToken
, newlineToken
-- starts with "\"
, try mathMultiline
, try mathInline
, try module_
, quotes
, birdTrack
, escape
, headers
, labeledLink
, link
, anchor
, numericEntity
, textElement
]
)
<|> other
{- FOURMOLU_ENABLE -}
-- Tokens
textElement :: Parser [LocatedToken]
@ -239,6 +254,7 @@ mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose
mathInline :: Lexer
mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose
-- TODO: make sure this starts at column 0?
birdTrack :: Lexer
birdTrack = delimitedNoTrailing ">> " eol BirdTrack