ref(lexer): attempt to not try on every token

...for a better error message and better perf
This commit is contained in:
Primrose 2025-09-28 12:35:56 +08:00 committed by elland
parent 1664694134
commit f66bc41ae2

View file

@ -82,27 +82,42 @@ lexText = go
Parsec.optionMaybe Parsec.eof >>= \case
Just _ -> pure []
Nothing -> do
toks <-
choice $
Parsec.try
<$> [ mathMultiline
, mathInline
, escape -- maths go before escape to avoid mismatch
, headers
toks <- topLevel
rest <- go
pure (toks <> rest)
{- FOURMOLU_DISABLE -}
topLevel =
-- backtracking here so we always have a chance to try "other", the "catch-all-leave-to-parser-to-deal-with" choice
-- TODO: is this desirable? do we throw lexer error at all?
try
( choice
-- Sorted in
-- - longest to shortest parse path
-- - highest frequency to lowest frequency (for performance?)
-- - more exact to more freeform (the latter can be the former but not vice versa)
[ spaceToken
, newlineToken
, spaceToken
, link
, try module_
, quotes
, birdTrack
-- starts with "\"
, try mathMultiline
, try mathInline
, escape
, headers
, labeledLink
, module_
, link
, anchor
, numericEntity
, textElement
, quotes
, birdTrack
, other
]
rest <- go
pure (toks <> rest)
)
<|> other
{- FOURMOLU_ENABLE -}
-- Tokens
@ -239,6 +254,7 @@ mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose
mathInline :: Lexer
mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose
-- TODO: make sure this starts at column 0?
birdTrack :: Lexer
birdTrack = delimitedNoTrailing ">> " eol BirdTrack