diff --git a/src/Lexer.hs b/src/Lexer.hs index 77fc84a..0d46560 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -82,28 +82,43 @@ lexText = go Parsec.optionMaybe Parsec.eof >>= \case Just _ -> pure [] Nothing -> do - toks <- - choice $ - Parsec.try - <$> [ mathMultiline - , mathInline - , escape -- maths go before escape to avoid mismatch - , headers - , newlineToken - , spaceToken - , link - , labeledLink - , module_ - , anchor - , numericEntity - , textElement - , quotes - , birdTrack - , other - ] + toks <- topLevel rest <- go pure (toks <> rest) +{- FOURMOLU_DISABLE -} + topLevel = + -- backtracking here so we always have a chance to try "other", the "catch-all-leave-to-parser-to-deal-with" choice + -- TODO: is this desirable? do we throw lexer error at all? + try + ( choice + -- Sorted in + -- - longest to shortest parse path + -- - highest frequency to lowest frequency (for performance?) + -- - more exact to more freeform (the latter can be the former but not vice versa) + [ spaceToken + , newlineToken + + -- starts with "\" + , try mathMultiline + , try mathInline + + , try module_ + , quotes + , birdTrack + + , escape + , headers + , labeledLink + , link + , anchor + , numericEntity + , textElement + ] + ) + <|> other +{- FOURMOLU_ENABLE -} + -- Tokens textElement :: Parser [LocatedToken] @@ -239,6 +254,7 @@ mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose mathInline :: Lexer mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose +-- TODO: make sure this starts at column 0? birdTrack :: Lexer birdTrack = delimitedNoTrailing ">> " eol BirdTrack