Compare commits

...
Sign in to create a new pull request.

3 commits

Author SHA1 Message Date
3b5c904239 Trying something to get rid of so much try/choice.
Some checks failed
Haskell CI / build (pull_request) Successful in 2m49s
Haskell CI / test (pull_request) Successful in 2m28s
Haskell CI / fourmolu (pull_request) Successful in 7s
Haskell CI / hlint (pull_request) Failing after 5s
2025-10-05 11:42:47 +00:00
eb431a9c97 Added expression eval; adjusted birdtrack, added sol combinator. 2025-10-05 11:42:47 +00:00
6d4a941178 ref(lexer): attempt to not try on every token
...for a better error message and better perf
2025-10-05 11:42:47 +00:00
2 changed files with 84 additions and 28 deletions

View file

@ -7,7 +7,7 @@ module Lexer (
)
where
import Control.Monad (mfilter, void)
import Control.Monad (guard, mfilter, void)
import Data.Char (ord, toLower)
import Data.Functor (($>))
import Data.Text (Text, intercalate)
@ -42,6 +42,7 @@ data Token
| Escape
| EmphasisOpen
| EmphasisClose
| Expression
| Header Level
| MonospaceOpen
| MonospaceClose
@ -82,27 +83,43 @@ lexText = go
Parsec.optionMaybe Parsec.eof >>= \case
Just _ -> pure []
Nothing -> do
toks <-
choice $
Parsec.try
<$> [ mathMultiline
, mathInline
, escape -- maths go before escape to avoid mismatch
toks <- topLevel
rest <- go
pure (toks <> rest)
topLevel = do
-- check for start-of-line markup first
lineStart <-
optionMaybe $
choice
[ try expression
, try birdTrack
, headers
]
case lineStart of
Just toks -> pure toks
Nothing ->
choice $
-- Sorted in
-- - longest to shortest parse path
-- - highest frequency to lowest frequency (for performance?)
-- - more exact to more freeform (the latter can be the former but not vice versa)
[ spaceToken
, newlineToken
, spaceToken
, link
, try module_
, quotes
, -- starts with "\"
try mathMultiline
, try mathInline
, escape
, labeledLink
, module_
, link
, anchor
, numericEntity
, textElement
, quotes
, birdTrack
, other
]
rest <- go
pure (toks <> rest)
-- Tokens
@ -143,16 +160,22 @@ delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <
asList (a, tok, b) = [a, tok, b]
delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken]
delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
delimitedNoTrailing openP closeP openTok =
asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
where
asList (a, tok, _) = [a, tok]
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
delimitedSymmetric s = delimited s s
--- End of line // end of file
eol :: Parser ()
eol = void "\n" <|> void "\r\n" <|> Parsec.eof
-- Start of line // start of file
sol :: Parser ()
sol = getPosition >>= guard . (== 1) . sourceColumn
header1 :: Lexer
header1 = delimitedNoTrailing "= " eol (Header One)
@ -239,8 +262,15 @@ mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose
mathInline :: Lexer
mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose
-- TODO: make sure this starts at column 0?
birdTrack :: Lexer
birdTrack = delimitedNoTrailing ">> " eol BirdTrack
birdTrack = delimitedNoTrailing (sol <* "> ") eol BirdTrack
-- TODO: also match following lines iff:
-- they start with alphanum
-- they're not empty
expression :: Lexer
expression = delimitedNoTrailing (sol <* ">>> ") eol Expression
escape :: Lexer
escape = delimitedNoTrailing "\\" eol Escape

View file

@ -18,7 +18,7 @@ main = hspec $ do
describe "minimal" do
it "handles unicode" unicode
it "escapes" escaping
it "maths" math
it "maths" maths
it "anchors" anchor
it "space chars" space
it "bare string" someString
@ -29,6 +29,7 @@ main = hspec $ do
it "bird tracks" birdTracks
it "module names" modules
it "quotes" quotes
it "expressions" expressions
it "numeric entity" numericEntity
it "ignores nesting" ignoreNesting
@ -91,8 +92,8 @@ anchor =
`shouldLexTo` [ (1, 1, Anchor "myAnchor")
]
math :: IO ()
math = do
maths :: IO ()
maths = do
"\\[some math\\]"
`shouldLexTo` [ (1, 1, MathMultilineOpen)
, (1, 3, Token "some math")
@ -128,10 +129,35 @@ ignoreNesting =
]
birdTracks :: Expectation
birdTracks =
">> code"
birdTracks = do
"> code line"
`shouldLexTo` [ (1, 1, BirdTrack)
, (1, 4, Token "code")
, (1, 3, Token "code line")
]
" > not code"
`shouldLexTo` [ (1, 1, Space)
, (1, 2, Token ">")
, (1, 3, Space)
, (1, 4, Token "not")
, (1, 7, Space)
, (1, 8, Token "code")
]
expressions :: Expectation
expressions = do
">>> eval this"
`shouldLexTo` [ (1, 1, Expression)
, (1, 5, Token "eval this")
]
" >>> not eval this"
`shouldLexTo` [ (1, 1, Space)
, (1, 2, Token ">>>")
, (1, 5, Space)
, (1, 6, Token "not")
, (1, 9, Space)
, (1, 10, Token "eval")
, (1, 14, Space)
, (1, 15, Token "this")
]
quotes :: Expectation