diff --git a/src/Lexer.hs b/src/Lexer.hs index 77fc84a..c2da642 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -7,7 +7,7 @@ module Lexer ( ) where -import Control.Monad (mfilter, void) +import Control.Monad (guard, mfilter, void) import Data.Char (ord, toLower) import Data.Functor (($>)) import Data.Text (Text, intercalate) @@ -42,6 +42,7 @@ data Token | Escape | EmphasisOpen | EmphasisClose + | Expression | Header Level | MonospaceOpen | MonospaceClose @@ -82,28 +83,44 @@ lexText = go Parsec.optionMaybe Parsec.eof >>= \case Just _ -> pure [] Nothing -> do - toks <- - choice $ - Parsec.try - <$> [ mathMultiline - , mathInline - , escape -- maths go before escape to avoid mismatch - , headers - , newlineToken - , spaceToken - , link - , labeledLink - , module_ - , anchor - , numericEntity - , textElement - , quotes - , birdTrack - , other - ] + toks <- topLevel rest <- go pure (toks <> rest) + topLevel = do + -- check for start-of-line markup first + lineStart <- + optionMaybe $ + choice + [ try expression + , try birdTrack + , headers + ] + + case lineStart of + Just toks -> pure toks + Nothing -> + choice $ + -- Sorted in + -- - longest to shortest parse path + -- - highest frequency to lowest frequency (for performance?) + -- - more exact to more freeform (the latter can be the former but not vice versa) + [ spaceToken + , newlineToken + , try module_ + , quotes + , -- starts with "\" + try mathMultiline + , try mathInline + , escape + , labeledLink + , link + , anchor + , numericEntity + , textElement + , other + ] + -- Tokens textElement :: Parser [LocatedToken] @@ -143,16 +160,22 @@ delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok < asList (a, tok, b) = [a, tok, b] delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken] -delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) +delimitedNoTrailing openP closeP openTok = + asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) where asList (a, tok, _) = [a, tok] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric s = delimited s s +--- End of line // end of file eol :: Parser () eol = void "\n" <|> void "\r\n" <|> Parsec.eof +-- Start of line // start of file +sol :: Parser () +sol = getPosition >>= guard . (== 1) . sourceColumn + header1 :: Lexer header1 = delimitedNoTrailing "= " eol (Header One) @@ -239,8 +262,15 @@ mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose mathInline :: Lexer mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose +-- TODO: make sure this starts at column 0? birdTrack :: Lexer -birdTrack = delimitedNoTrailing ">> " eol BirdTrack +birdTrack = delimitedNoTrailing (sol <* "> ") eol BirdTrack + +-- TODO: also match following lines iff: +-- they start with alphanum +-- they're not empty +expression :: Lexer +expression = delimitedNoTrailing (sol <* ">>> ") eol Expression escape :: Lexer escape = delimitedNoTrailing "\\" eol Escape diff --git a/test/Spec.hs b/test/Spec.hs index 2040e2f..3db44f1 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -18,7 +18,7 @@ main = hspec $ do describe "minimal" do it "handles unicode" unicode it "escapes" escaping - it "maths" math + it "maths" maths it "anchors" anchor it "space chars" space it "bare string" someString @@ -29,6 +29,7 @@ main = hspec $ do it "bird tracks" birdTracks it "module names" modules it "quotes" quotes + it "expressions" expressions it "numeric entity" numericEntity it "ignores nesting" ignoreNesting @@ -91,8 +92,8 @@ anchor = `shouldLexTo` [ (1, 1, Anchor "myAnchor") ] -math :: IO () -math = do +maths :: IO () +maths = do "\\[some math\\]" `shouldLexTo` [ (1, 1, MathMultilineOpen) , (1, 3, Token "some math") @@ -128,10 +129,35 @@ ignoreNesting = ] birdTracks :: Expectation -birdTracks = - ">> code" +birdTracks = do + "> code line" `shouldLexTo` [ (1, 1, BirdTrack) - , (1, 4, Token "code") + , (1, 3, Token "code line") + ] + " > not code" + `shouldLexTo` [ (1, 1, Space) + , (1, 2, Token ">") + , (1, 3, Space) + , (1, 4, Token "not") + , (1, 7, Space) + , (1, 8, Token "code") + ] + +expressions :: Expectation +expressions = do + ">>> eval this" + `shouldLexTo` [ (1, 1, Expression) + , (1, 5, Token "eval this") + ] + " >>> not eval this" + `shouldLexTo` [ (1, 1, Space) + , (1, 2, Token ">>>") + , (1, 5, Space) + , (1, 6, Token "not") + , (1, 9, Space) + , (1, 10, Token "eval") + , (1, 14, Space) + , (1, 15, Token "this") ] quotes :: Expectation