Compare commits

...

3 commits

Author SHA1 Message Date
a64ac93bd9 Added expression eval; adjusted birdtrack, added sol combinator.
All checks were successful
Haskell CI / build (pull_request) Successful in 2m51s
Haskell CI / test (pull_request) Successful in 2m40s
Haskell CI / fourmolu (pull_request) Successful in 6s
Haskell CI / hlint (pull_request) Successful in 6s
2025-10-05 15:20:28 +00:00
f66bc41ae2 ref(lexer): attempt to not try on every token
...for a better error message and better perf
2025-10-05 15:20:28 +00:00
1664694134 chore: add hls (#10)
Reviewed-on: #10
Co-authored-by: Igor Ranieri <igor@elland.me>
Co-committed-by: Igor Ranieri <igor@elland.me>
2025-10-05 13:18:49 +00:00
3 changed files with 88 additions and 27 deletions

View file

@ -12,6 +12,7 @@ pkgs.mkShell rec {
with pkgs;
[
haskell.packages.ghc912.ghc
haskell.packages.ghc912.haskell-language-server
zlib
]
++ map haskell.lib.justStaticExecutables [

View file

@ -42,6 +42,7 @@ data Token
| Escape
| EmphasisOpen
| EmphasisClose
| Expression
| Header Level
| MonospaceOpen
| MonospaceClose
@ -82,28 +83,44 @@ lexText = go
Parsec.optionMaybe Parsec.eof >>= \case
Just _ -> pure []
Nothing -> do
toks <-
choice $
Parsec.try
<$> [ mathMultiline
, mathInline
, escape -- maths go before escape to avoid mismatch
, headers
, newlineToken
, spaceToken
, link
, labeledLink
, module_
, anchor
, numericEntity
, textElement
, quotes
, birdTrack
, other
]
toks <- topLevel
rest <- go
pure (toks <> rest)
{- FOURMOLU_DISABLE -}
topLevel =
-- backtracking here so we always have a chance to try "other", the "catch-all-leave-to-parser-to-deal-with" choice
-- TODO: is this desirable? do we throw lexer error at all?
try
( choice
-- Sorted in
-- - longest to shortest parse path
-- - highest frequency to lowest frequency (for performance?)
-- - more exact to more freeform (the latter can be the former but not vice versa)
[ spaceToken
, newlineToken
, try module_
, quotes
, try expression
, birdTrack
-- starts with "\"
, try mathMultiline
, try mathInline
, escape
, headers
, labeledLink
, link
, anchor
, numericEntity
, textElement
]
)
<|> other
{- FOURMOLU_ENABLE -}
-- Tokens
textElement :: Parser [LocatedToken]
@ -143,16 +160,26 @@ delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <
asList (a, tok, b) = [a, tok, b]
delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken]
delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
delimitedNoTrailing openP closeP openTok =
asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
where
asList (a, tok, _) = [a, tok]
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
delimitedSymmetric s = delimited s s
--- End of line // end of file
eol :: Parser ()
eol = void "\n" <|> void "\r\n" <|> Parsec.eof
-- Start of line // start of file
sol :: Parser ()
sol = do
p <- getPosition
if sourceColumn p == 1
then pure ()
else fail "Not at start of line/document"
header1 :: Lexer
header1 = delimitedNoTrailing "= " eol (Header One)
@ -239,8 +266,15 @@ mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose
mathInline :: Lexer
mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose
-- TODO: make sure this starts at column 0?
birdTrack :: Lexer
birdTrack = delimitedNoTrailing ">> " eol BirdTrack
birdTrack = delimitedNoTrailing (sol <* "> ") eol BirdTrack
-- TODO: also match following lines iff:
-- they start with alphanum
-- they're not empty
expression :: Lexer
expression = delimitedNoTrailing (sol <* ">>> ") eol Expression
escape :: Lexer
escape = delimitedNoTrailing "\\" eol Escape

View file

@ -18,7 +18,7 @@ main = hspec $ do
describe "minimal" do
it "handles unicode" unicode
it "escapes" escaping
it "maths" math
it "maths" maths
it "anchors" anchor
it "space chars" space
it "bare string" someString
@ -29,6 +29,7 @@ main = hspec $ do
it "bird tracks" birdTracks
it "module names" modules
it "quotes" quotes
it "expressions" expressions
it "numeric entity" numericEntity
it "ignores nesting" ignoreNesting
@ -91,8 +92,8 @@ anchor =
`shouldLexTo` [ (1, 1, Anchor "myAnchor")
]
math :: IO ()
math = do
maths :: IO ()
maths = do
"\\[some math\\]"
`shouldLexTo` [ (1, 1, MathMultilineOpen)
, (1, 3, Token "some math")
@ -128,10 +129,35 @@ ignoreNesting =
]
birdTracks :: Expectation
birdTracks =
">> code"
birdTracks = do
"> code line"
`shouldLexTo` [ (1, 1, BirdTrack)
, (1, 4, Token "code")
, (1, 3, Token "code line")
]
" > not code"
`shouldLexTo` [ (1, 1, Space)
, (1, 2, Token ">")
, (1, 3, Space)
, (1, 4, Token "not")
, (1, 7, Space)
, (1, 8, Token "code")
]
expressions :: Expectation
expressions = do
">>> eval this"
`shouldLexTo` [ (1, 1, Expression)
, (1, 5, Token "eval this")
]
" >>> not eval this"
`shouldLexTo` [ (1, 1, Space)
, (1, 2, Token ">>>")
, (1, 5, Space)
, (1, 6, Token "not")
, (1, 9, Space)
, (1, 10, Token "eval")
, (1, 14, Space)
, (1, 15, Token "this")
]
quotes :: Expectation