ref(lexer): attempt to not try on every token for a better error message

This commit is contained in:
Primrose 2025-09-28 12:35:56 +08:00
parent 82eb8435ab
commit 8261866ef2
Signed by: primrose
GPG key ID: 4E887A4CA9714ADA

View file

@ -83,24 +83,32 @@ lexText = go
Just _ -> pure []
Nothing -> do
toks <-
choice $
Parsec.try
<$> [ mathMultiline
, mathInline
, escape -- maths go before escape to avoid mismatch
, headers
, newlineToken
, spaceToken
, link
, labeledLink
, module_
, anchor
, numericEntity
, textElement
, quotes
, birdTrack
, other
]
-- backtracking here so we always have a chance to try "other", the "catch-all-leave-to-parser-to-deal-with" choice
-- TODO: is this desirable? do we throw lexer error at all?
try
( choice
-- Sorted in
-- - longest to shortest parse path
-- - highest frequency to lowest frequency (for performance?)
-- - more exact to more freeform (the latter can be the former but not vice versa)
[ spaceToken
, newlineToken
, -- starts with "\"
try mathMultiline
, try mathInline
, escape
, headers
, labeledLink
, link
, anchor
, numericEntity
, textElement
, try module_
, quotes
, birdTrack
]
)
<|> other
rest <- go
pure (toks <> rest)
@ -239,6 +247,7 @@ mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose
mathInline :: Lexer
mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose
-- TODO: make sure this starts at column 0?
birdTrack :: Lexer
birdTrack = delimitedNoTrailing ">> " eol BirdTrack