WIP: ref(lexer): attempt to not try on every token #7

Closed
primrose wants to merge 2 commits from reduce-try-usage into dev
2 changed files with 87 additions and 27 deletions

View file

@ -42,6 +42,7 @@ data Token
| Escape | Escape
| EmphasisOpen | EmphasisOpen
| EmphasisClose | EmphasisClose
| Expression
| Header Level | Header Level
| MonospaceOpen | MonospaceOpen
| MonospaceClose | MonospaceClose
@ -82,28 +83,44 @@ lexText = go
Parsec.optionMaybe Parsec.eof >>= \case Parsec.optionMaybe Parsec.eof >>= \case
Just _ -> pure [] Just _ -> pure []
Nothing -> do Nothing -> do
toks <- toks <- topLevel
choice $
Parsec.try
<$> [ mathMultiline
, mathInline
, escape -- maths go before escape to avoid mismatch
, headers
, newlineToken
, spaceToken
, link
, labeledLink
, module_
, anchor
, numericEntity
, textElement
, quotes
, birdTrack
, other
]
rest <- go rest <- go
pure (toks <> rest) pure (toks <> rest)
{- FOURMOLU_DISABLE -}
topLevel =
-- backtracking here so we always have a chance to try "other", the "catch-all-leave-to-parser-to-deal-with" choice
-- TODO: is this desirable? do we throw lexer error at all?
try
( choice
-- Sorted in
-- - longest to shortest parse path
-- - highest frequency to lowest frequency (for performance?)
-- - more exact to more freeform (the latter can be the former but not vice versa)
[ spaceToken
, newlineToken
, try module_
, quotes
, try expression
, birdTrack
-- starts with "\"
, try mathMultiline
, try mathInline
, escape
, headers
, labeledLink
, link
, anchor
, numericEntity
, textElement
]
)
<|> other
{- FOURMOLU_ENABLE -}
-- Tokens -- Tokens
textElement :: Parser [LocatedToken] textElement :: Parser [LocatedToken]
@ -143,16 +160,26 @@ delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <
asList (a, tok, b) = [a, tok, b] asList (a, tok, b) = [a, tok, b]
delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken] delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken]
delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) delimitedNoTrailing openP closeP openTok =
asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
where where
asList (a, tok, _) = [a, tok] asList (a, tok, _) = [a, tok]
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
delimitedSymmetric s = delimited s s delimitedSymmetric s = delimited s s
--- End of line // end of file
eol :: Parser () eol :: Parser ()
eol = void "\n" <|> void "\r\n" <|> Parsec.eof eol = void "\n" <|> void "\r\n" <|> Parsec.eof
-- Start of line // start of file
sol :: Parser ()
sol = do
p <- getPosition
if sourceColumn p == 1
then pure ()
else fail "Not at start of line/document"
header1 :: Lexer header1 :: Lexer
header1 = delimitedNoTrailing "= " eol (Header One) header1 = delimitedNoTrailing "= " eol (Header One)
@ -239,8 +266,15 @@ mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose
mathInline :: Lexer mathInline :: Lexer
mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose
-- TODO: make sure this starts at column 0?
birdTrack :: Lexer birdTrack :: Lexer
birdTrack = delimitedNoTrailing ">> " eol BirdTrack birdTrack = delimitedNoTrailing (sol <* "> ") eol BirdTrack
-- TODO: also match following lines iff:
-- they start with alphanum
-- they're not empty
expression :: Lexer
expression = delimitedNoTrailing (sol <* ">>> ") eol Expression
escape :: Lexer escape :: Lexer
escape = delimitedNoTrailing "\\" eol Escape escape = delimitedNoTrailing "\\" eol Escape

View file

@ -18,7 +18,7 @@ main = hspec $ do
describe "minimal" do describe "minimal" do
it "handles unicode" unicode it "handles unicode" unicode
it "escapes" escaping it "escapes" escaping
it "maths" math it "maths" maths
it "anchors" anchor it "anchors" anchor
it "space chars" space it "space chars" space
it "bare string" someString it "bare string" someString
@ -29,6 +29,7 @@ main = hspec $ do
it "bird tracks" birdTracks it "bird tracks" birdTracks
it "module names" modules it "module names" modules
it "quotes" quotes it "quotes" quotes
it "expressions" expressions
it "numeric entity" numericEntity it "numeric entity" numericEntity
it "ignores nesting" ignoreNesting it "ignores nesting" ignoreNesting
@ -91,8 +92,8 @@ anchor =
`shouldLexTo` [ (1, 1, Anchor "myAnchor") `shouldLexTo` [ (1, 1, Anchor "myAnchor")
] ]
math :: IO () maths :: IO ()
math = do maths = do
"\\[some math\\]" "\\[some math\\]"
`shouldLexTo` [ (1, 1, MathMultilineOpen) `shouldLexTo` [ (1, 1, MathMultilineOpen)
, (1, 3, Token "some math") , (1, 3, Token "some math")
@ -128,10 +129,35 @@ ignoreNesting =
] ]
birdTracks :: Expectation birdTracks :: Expectation
birdTracks = birdTracks = do
">> code" "> code line"
`shouldLexTo` [ (1, 1, BirdTrack) `shouldLexTo` [ (1, 1, BirdTrack)
, (1, 4, Token "code") , (1, 3, Token "code line")
]
" > not code"
`shouldLexTo` [ (1, 1, Space)
, (1, 2, Token ">")
, (1, 3, Space)
, (1, 4, Token "not")
, (1, 7, Space)
, (1, 8, Token "code")
]
expressions :: Expectation
expressions = do
">>> eval this"
`shouldLexTo` [ (1, 1, Expression)
, (1, 5, Token "eval this")
]
" >>> not eval this"
`shouldLexTo` [ (1, 1, Space)
, (1, 2, Token ">>>")
, (1, 5, Space)
, (1, 6, Token "not")
, (1, 9, Space)
, (1, 10, Token "eval")
, (1, 14, Space)
, (1, 15, Token "this")
] ]
quotes :: Expectation quotes :: Expectation