Trying something to get rid of so much try/choice. #8
2 changed files with 84 additions and 28 deletions
74
src/Lexer.hs
74
src/Lexer.hs
|
|
@ -7,7 +7,7 @@ module Lexer (
|
|||
)
|
||||
where
|
||||
|
||||
import Control.Monad (mfilter, void)
|
||||
import Control.Monad (guard, mfilter, void)
|
||||
import Data.Char (ord, toLower)
|
||||
import Data.Functor (($>))
|
||||
import Data.Text (Text, intercalate)
|
||||
|
|
@ -42,6 +42,7 @@ data Token
|
|||
| Escape
|
||||
| EmphasisOpen
|
||||
| EmphasisClose
|
||||
| Expression
|
||||
| Header Level
|
||||
| MonospaceOpen
|
||||
| MonospaceClose
|
||||
|
|
@ -82,28 +83,44 @@ lexText = go
|
|||
Parsec.optionMaybe Parsec.eof >>= \case
|
||||
Just _ -> pure []
|
||||
Nothing -> do
|
||||
toks <-
|
||||
choice $
|
||||
Parsec.try
|
||||
<$> [ mathMultiline
|
||||
, mathInline
|
||||
, escape -- maths go before escape to avoid mismatch
|
||||
, headers
|
||||
, newlineToken
|
||||
, spaceToken
|
||||
, link
|
||||
, labeledLink
|
||||
, module_
|
||||
, anchor
|
||||
, numericEntity
|
||||
, textElement
|
||||
, quotes
|
||||
, birdTrack
|
||||
, other
|
||||
]
|
||||
toks <- topLevel
|
||||
rest <- go
|
||||
pure (toks <> rest)
|
||||
|
||||
topLevel = do
|
||||
-- check for start-of-line markup first
|
||||
lineStart <-
|
||||
optionMaybe $
|
||||
choice
|
||||
[ try expression
|
||||
, try birdTrack
|
||||
, headers
|
||||
]
|
||||
|
||||
case lineStart of
|
||||
Just toks -> pure toks
|
||||
Nothing ->
|
||||
choice $
|
||||
-- Sorted in
|
||||
-- - longest to shortest parse path
|
||||
-- - highest frequency to lowest frequency (for performance?)
|
||||
-- - more exact to more freeform (the latter can be the former but not vice versa)
|
||||
[ spaceToken
|
||||
, newlineToken
|
||||
, try module_
|
||||
, quotes
|
||||
, -- starts with "\"
|
||||
try mathMultiline
|
||||
, try mathInline
|
||||
, escape
|
||||
, labeledLink
|
||||
, link
|
||||
, anchor
|
||||
, numericEntity
|
||||
, textElement
|
||||
, other
|
||||
]
|
||||
|
||||
-- Tokens
|
||||
|
||||
textElement :: Parser [LocatedToken]
|
||||
|
|
@ -143,16 +160,22 @@ delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <
|
|||
asList (a, tok, b) = [a, tok, b]
|
||||
|
||||
delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken]
|
||||
delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
|
||||
delimitedNoTrailing openP closeP openTok =
|
||||
asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
|
||||
where
|
||||
asList (a, tok, _) = [a, tok]
|
||||
|
||||
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
|
||||
delimitedSymmetric s = delimited s s
|
||||
|
||||
--- End of line // end of file
|
||||
eol :: Parser ()
|
||||
eol = void "\n" <|> void "\r\n" <|> Parsec.eof
|
||||
|
||||
-- Start of line // start of file
|
||||
sol :: Parser ()
|
||||
sol = getPosition >>= guard . (== 1) . sourceColumn
|
||||
|
||||
header1 :: Lexer
|
||||
header1 = delimitedNoTrailing "= " eol (Header One)
|
||||
|
||||
|
|
@ -239,8 +262,15 @@ mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose
|
|||
mathInline :: Lexer
|
||||
mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose
|
||||
|
||||
-- TODO: make sure this starts at column 0?
|
||||
birdTrack :: Lexer
|
||||
birdTrack = delimitedNoTrailing ">> " eol BirdTrack
|
||||
birdTrack = delimitedNoTrailing (sol <* "> ") eol BirdTrack
|
||||
|
||||
-- TODO: also match following lines iff:
|
||||
-- they start with alphanum
|
||||
-- they're not empty
|
||||
expression :: Lexer
|
||||
expression = delimitedNoTrailing (sol <* ">>> ") eol Expression
|
||||
|
||||
escape :: Lexer
|
||||
escape = delimitedNoTrailing "\\" eol Escape
|
||||
|
|
|
|||
38
test/Spec.hs
38
test/Spec.hs
|
|
@ -18,7 +18,7 @@ main = hspec $ do
|
|||
describe "minimal" do
|
||||
it "handles unicode" unicode
|
||||
it "escapes" escaping
|
||||
it "maths" math
|
||||
it "maths" maths
|
||||
it "anchors" anchor
|
||||
it "space chars" space
|
||||
it "bare string" someString
|
||||
|
|
@ -29,6 +29,7 @@ main = hspec $ do
|
|||
it "bird tracks" birdTracks
|
||||
it "module names" modules
|
||||
it "quotes" quotes
|
||||
it "expressions" expressions
|
||||
it "numeric entity" numericEntity
|
||||
it "ignores nesting" ignoreNesting
|
||||
|
||||
|
|
@ -91,8 +92,8 @@ anchor =
|
|||
`shouldLexTo` [ (1, 1, Anchor "myAnchor")
|
||||
]
|
||||
|
||||
math :: IO ()
|
||||
math = do
|
||||
maths :: IO ()
|
||||
maths = do
|
||||
"\\[some math\\]"
|
||||
`shouldLexTo` [ (1, 1, MathMultilineOpen)
|
||||
, (1, 3, Token "some math")
|
||||
|
|
@ -128,10 +129,35 @@ ignoreNesting =
|
|||
]
|
||||
|
||||
birdTracks :: Expectation
|
||||
birdTracks =
|
||||
">> code"
|
||||
birdTracks = do
|
||||
"> code line"
|
||||
`shouldLexTo` [ (1, 1, BirdTrack)
|
||||
, (1, 4, Token "code")
|
||||
, (1, 3, Token "code line")
|
||||
]
|
||||
" > not code"
|
||||
`shouldLexTo` [ (1, 1, Space)
|
||||
, (1, 2, Token ">")
|
||||
, (1, 3, Space)
|
||||
, (1, 4, Token "not")
|
||||
, (1, 7, Space)
|
||||
, (1, 8, Token "code")
|
||||
]
|
||||
|
||||
expressions :: Expectation
|
||||
expressions = do
|
||||
">>> eval this"
|
||||
`shouldLexTo` [ (1, 1, Expression)
|
||||
, (1, 5, Token "eval this")
|
||||
]
|
||||
" >>> not eval this"
|
||||
`shouldLexTo` [ (1, 1, Space)
|
||||
, (1, 2, Token ">>>")
|
||||
, (1, 5, Space)
|
||||
, (1, 6, Token "not")
|
||||
, (1, 9, Space)
|
||||
, (1, 10, Token "eval")
|
||||
, (1, 14, Space)
|
||||
, (1, 15, Token "this")
|
||||
]
|
||||
|
||||
quotes :: Expectation
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue