diff --git a/src/Lexer.hs b/src/Lexer.hs index 3b04ecc..77fc84a 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -42,7 +42,6 @@ data Token | Escape | EmphasisOpen | EmphasisClose - | Expression | Header Level | MonospaceOpen | MonospaceClose @@ -83,44 +82,28 @@ lexText = go Parsec.optionMaybe Parsec.eof >>= \case Just _ -> pure [] Nothing -> do - toks <- topLevel + toks <- + choice $ + Parsec.try + <$> [ mathMultiline + , mathInline + , escape -- maths go before escape to avoid mismatch + , headers + , newlineToken + , spaceToken + , link + , labeledLink + , module_ + , anchor + , numericEntity + , textElement + , quotes + , birdTrack + , other + ] rest <- go pure (toks <> rest) -{- FOURMOLU_DISABLE -} - topLevel = - -- backtracking here so we always have a chance to try "other", the "catch-all-leave-to-parser-to-deal-with" choice - -- TODO: is this desirable? do we throw lexer error at all? - try - ( choice - -- Sorted in - -- - longest to shortest parse path - -- - highest frequency to lowest frequency (for performance?) - -- - more exact to more freeform (the latter can be the former but not vice versa) - [ spaceToken - , newlineToken - - , try module_ - , quotes - , try expression - , birdTrack - - -- starts with "\" - , try mathMultiline - , try mathInline - , escape - - , headers - , labeledLink - , link - , anchor - , numericEntity - , textElement - ] - ) - <|> other -{- FOURMOLU_ENABLE -} - -- Tokens textElement :: Parser [LocatedToken] @@ -160,26 +143,16 @@ delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok < asList (a, tok, b) = [a, tok, b] delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken] -delimitedNoTrailing openP closeP openTok = - asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) +delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) where asList (a, tok, _) = [a, tok] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric s = delimited s s ---- End of line // end of file eol :: Parser () eol = void "\n" <|> void "\r\n" <|> Parsec.eof --- Start of line // start of file -sol :: Parser () -sol = do - p <- getPosition - if sourceColumn p == 1 - then pure () - else fail "Not at start of line/document" - header1 :: Lexer header1 = delimitedNoTrailing "= " eol (Header One) @@ -266,15 +239,8 @@ mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose mathInline :: Lexer mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose --- TODO: make sure this starts at column 0? birdTrack :: Lexer -birdTrack = delimitedNoTrailing (sol <* "> ") eol BirdTrack - --- TODO: also match following lines iff: --- they start with alphanum --- they're not empty -expression :: Lexer -expression = delimitedNoTrailing (sol <* ">>> ") eol Expression +birdTrack = delimitedNoTrailing ">> " eol BirdTrack escape :: Lexer escape = delimitedNoTrailing "\\" eol Escape diff --git a/test/Spec.hs b/test/Spec.hs index 3db44f1..2040e2f 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -18,7 +18,7 @@ main = hspec $ do describe "minimal" do it "handles unicode" unicode it "escapes" escaping - it "maths" maths + it "maths" math it "anchors" anchor it "space chars" space it "bare string" someString @@ -29,7 +29,6 @@ main = hspec $ do it "bird tracks" birdTracks it "module names" modules it "quotes" quotes - it "expressions" expressions it "numeric entity" numericEntity it "ignores nesting" ignoreNesting @@ -92,8 +91,8 @@ anchor = `shouldLexTo` [ (1, 1, Anchor "myAnchor") ] -maths :: IO () -maths = do +math :: IO () +math = do "\\[some math\\]" `shouldLexTo` [ (1, 1, MathMultilineOpen) , (1, 3, Token "some math") @@ -129,35 +128,10 @@ ignoreNesting = ] birdTracks :: Expectation -birdTracks = do - "> code line" +birdTracks = + ">> code" `shouldLexTo` [ (1, 1, BirdTrack) - , (1, 3, Token "code line") - ] - " > not code" - `shouldLexTo` [ (1, 1, Space) - , (1, 2, Token ">") - , (1, 3, Space) - , (1, 4, Token "not") - , (1, 7, Space) - , (1, 8, Token "code") - ] - -expressions :: Expectation -expressions = do - ">>> eval this" - `shouldLexTo` [ (1, 1, Expression) - , (1, 5, Token "eval this") - ] - " >>> not eval this" - `shouldLexTo` [ (1, 1, Space) - , (1, 2, Token ">>>") - , (1, 5, Space) - , (1, 6, Token "not") - , (1, 9, Space) - , (1, 10, Token "eval") - , (1, 14, Space) - , (1, 15, Token "this") + , (1, 4, Token "code") ] quotes :: Expectation