ref(lexer): attempt to not try on every token
All checks were successful
Haskell CI / build (pull_request) Successful in 2m33s
Haskell CI / test (pull_request) Successful in 2m25s
Haskell CI / fourmolu (pull_request) Successful in 6s
Haskell CI / hlint (pull_request) Successful in 5s

...for a better error message and better perf
This commit is contained in:
Primrose 2025-09-28 12:35:56 +08:00
parent 82eb8435ab
commit 449b7c8ca7
Signed by: primrose
GPG key ID: 4E887A4CA9714ADA

View file

@ -82,27 +82,42 @@ lexText = go
Parsec.optionMaybe Parsec.eof >>= \case Parsec.optionMaybe Parsec.eof >>= \case
Just _ -> pure [] Just _ -> pure []
Nothing -> do Nothing -> do
toks <- toks <- topLevel
choice $ rest <- go
Parsec.try pure (toks <> rest)
<$> [ mathMultiline
, mathInline {- FOURMOLU_DISABLE -}
, escape -- maths go before escape to avoid mismatch topLevel =
, headers -- backtracking here so we always have a chance to try "other", the "catch-all-leave-to-parser-to-deal-with" choice
-- TODO: is this desirable? do we throw lexer error at all?
try
( choice
-- Sorted in
-- - longest to shortest parse path
-- - highest frequency to lowest frequency (for performance?)
-- - more exact to more freeform (the latter can be the former but not vice versa)
[ spaceToken
, newlineToken , newlineToken
, spaceToken
, link , try module_
, quotes
, birdTrack
-- starts with "\"
, try mathMultiline
, try mathInline
, escape
, headers
, labeledLink , labeledLink
, module_ , link
, anchor , anchor
, numericEntity , numericEntity
, textElement , textElement
, quotes
, birdTrack
, other
] ]
rest <- go )
pure (toks <> rest) <|> other
{- FOURMOLU_ENABLE -}
-- Tokens -- Tokens
@ -239,6 +254,7 @@ mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose
mathInline :: Lexer mathInline :: Lexer
mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose
-- TODO: make sure this starts at column 0?
birdTrack :: Lexer birdTrack :: Lexer
birdTrack = delimitedNoTrailing ">> " eol BirdTrack birdTrack = delimitedNoTrailing ">> " eol BirdTrack