Compare commits
3 commits
dev
...
tentative-
| Author | SHA1 | Date | |
|---|---|---|---|
| 3b5c904239 | |||
| eb431a9c97 | |||
| 6d4a941178 |
2 changed files with 84 additions and 28 deletions
74
src/Lexer.hs
74
src/Lexer.hs
|
|
@ -7,7 +7,7 @@ module Lexer (
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
|
|
||||||
import Control.Monad (mfilter, void)
|
import Control.Monad (guard, mfilter, void)
|
||||||
import Data.Char (ord, toLower)
|
import Data.Char (ord, toLower)
|
||||||
import Data.Functor (($>))
|
import Data.Functor (($>))
|
||||||
import Data.Text (Text, intercalate)
|
import Data.Text (Text, intercalate)
|
||||||
|
|
@ -42,6 +42,7 @@ data Token
|
||||||
| Escape
|
| Escape
|
||||||
| EmphasisOpen
|
| EmphasisOpen
|
||||||
| EmphasisClose
|
| EmphasisClose
|
||||||
|
| Expression
|
||||||
| Header Level
|
| Header Level
|
||||||
| MonospaceOpen
|
| MonospaceOpen
|
||||||
| MonospaceClose
|
| MonospaceClose
|
||||||
|
|
@ -82,28 +83,44 @@ lexText = go
|
||||||
Parsec.optionMaybe Parsec.eof >>= \case
|
Parsec.optionMaybe Parsec.eof >>= \case
|
||||||
Just _ -> pure []
|
Just _ -> pure []
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
toks <-
|
toks <- topLevel
|
||||||
choice $
|
|
||||||
Parsec.try
|
|
||||||
<$> [ mathMultiline
|
|
||||||
, mathInline
|
|
||||||
, escape -- maths go before escape to avoid mismatch
|
|
||||||
, headers
|
|
||||||
, newlineToken
|
|
||||||
, spaceToken
|
|
||||||
, link
|
|
||||||
, labeledLink
|
|
||||||
, module_
|
|
||||||
, anchor
|
|
||||||
, numericEntity
|
|
||||||
, textElement
|
|
||||||
, quotes
|
|
||||||
, birdTrack
|
|
||||||
, other
|
|
||||||
]
|
|
||||||
rest <- go
|
rest <- go
|
||||||
pure (toks <> rest)
|
pure (toks <> rest)
|
||||||
|
|
||||||
|
topLevel = do
|
||||||
|
-- check for start-of-line markup first
|
||||||
|
lineStart <-
|
||||||
|
optionMaybe $
|
||||||
|
choice
|
||||||
|
[ try expression
|
||||||
|
, try birdTrack
|
||||||
|
, headers
|
||||||
|
]
|
||||||
|
|
||||||
|
case lineStart of
|
||||||
|
Just toks -> pure toks
|
||||||
|
Nothing ->
|
||||||
|
choice $
|
||||||
|
-- Sorted in
|
||||||
|
-- - longest to shortest parse path
|
||||||
|
-- - highest frequency to lowest frequency (for performance?)
|
||||||
|
-- - more exact to more freeform (the latter can be the former but not vice versa)
|
||||||
|
[ spaceToken
|
||||||
|
, newlineToken
|
||||||
|
, try module_
|
||||||
|
, quotes
|
||||||
|
, -- starts with "\"
|
||||||
|
try mathMultiline
|
||||||
|
, try mathInline
|
||||||
|
, escape
|
||||||
|
, labeledLink
|
||||||
|
, link
|
||||||
|
, anchor
|
||||||
|
, numericEntity
|
||||||
|
, textElement
|
||||||
|
, other
|
||||||
|
]
|
||||||
|
|
||||||
-- Tokens
|
-- Tokens
|
||||||
|
|
||||||
textElement :: Parser [LocatedToken]
|
textElement :: Parser [LocatedToken]
|
||||||
|
|
@ -143,16 +160,22 @@ delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <
|
||||||
asList (a, tok, b) = [a, tok, b]
|
asList (a, tok, b) = [a, tok, b]
|
||||||
|
|
||||||
delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken]
|
delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken]
|
||||||
delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
|
delimitedNoTrailing openP closeP openTok =
|
||||||
|
asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
|
||||||
where
|
where
|
||||||
asList (a, tok, _) = [a, tok]
|
asList (a, tok, _) = [a, tok]
|
||||||
|
|
||||||
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
|
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
|
||||||
delimitedSymmetric s = delimited s s
|
delimitedSymmetric s = delimited s s
|
||||||
|
|
||||||
|
--- End of line // end of file
|
||||||
eol :: Parser ()
|
eol :: Parser ()
|
||||||
eol = void "\n" <|> void "\r\n" <|> Parsec.eof
|
eol = void "\n" <|> void "\r\n" <|> Parsec.eof
|
||||||
|
|
||||||
|
-- Start of line // start of file
|
||||||
|
sol :: Parser ()
|
||||||
|
sol = getPosition >>= guard . (== 1) . sourceColumn
|
||||||
|
|
||||||
header1 :: Lexer
|
header1 :: Lexer
|
||||||
header1 = delimitedNoTrailing "= " eol (Header One)
|
header1 = delimitedNoTrailing "= " eol (Header One)
|
||||||
|
|
||||||
|
|
@ -239,8 +262,15 @@ mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose
|
||||||
mathInline :: Lexer
|
mathInline :: Lexer
|
||||||
mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose
|
mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose
|
||||||
|
|
||||||
|
-- TODO: make sure this starts at column 0?
|
||||||
birdTrack :: Lexer
|
birdTrack :: Lexer
|
||||||
birdTrack = delimitedNoTrailing ">> " eol BirdTrack
|
birdTrack = delimitedNoTrailing (sol <* "> ") eol BirdTrack
|
||||||
|
|
||||||
|
-- TODO: also match following lines iff:
|
||||||
|
-- they start with alphanum
|
||||||
|
-- they're not empty
|
||||||
|
expression :: Lexer
|
||||||
|
expression = delimitedNoTrailing (sol <* ">>> ") eol Expression
|
||||||
|
|
||||||
escape :: Lexer
|
escape :: Lexer
|
||||||
escape = delimitedNoTrailing "\\" eol Escape
|
escape = delimitedNoTrailing "\\" eol Escape
|
||||||
|
|
|
||||||
38
test/Spec.hs
38
test/Spec.hs
|
|
@ -18,7 +18,7 @@ main = hspec $ do
|
||||||
describe "minimal" do
|
describe "minimal" do
|
||||||
it "handles unicode" unicode
|
it "handles unicode" unicode
|
||||||
it "escapes" escaping
|
it "escapes" escaping
|
||||||
it "maths" math
|
it "maths" maths
|
||||||
it "anchors" anchor
|
it "anchors" anchor
|
||||||
it "space chars" space
|
it "space chars" space
|
||||||
it "bare string" someString
|
it "bare string" someString
|
||||||
|
|
@ -29,6 +29,7 @@ main = hspec $ do
|
||||||
it "bird tracks" birdTracks
|
it "bird tracks" birdTracks
|
||||||
it "module names" modules
|
it "module names" modules
|
||||||
it "quotes" quotes
|
it "quotes" quotes
|
||||||
|
it "expressions" expressions
|
||||||
it "numeric entity" numericEntity
|
it "numeric entity" numericEntity
|
||||||
it "ignores nesting" ignoreNesting
|
it "ignores nesting" ignoreNesting
|
||||||
|
|
||||||
|
|
@ -91,8 +92,8 @@ anchor =
|
||||||
`shouldLexTo` [ (1, 1, Anchor "myAnchor")
|
`shouldLexTo` [ (1, 1, Anchor "myAnchor")
|
||||||
]
|
]
|
||||||
|
|
||||||
math :: IO ()
|
maths :: IO ()
|
||||||
math = do
|
maths = do
|
||||||
"\\[some math\\]"
|
"\\[some math\\]"
|
||||||
`shouldLexTo` [ (1, 1, MathMultilineOpen)
|
`shouldLexTo` [ (1, 1, MathMultilineOpen)
|
||||||
, (1, 3, Token "some math")
|
, (1, 3, Token "some math")
|
||||||
|
|
@ -128,10 +129,35 @@ ignoreNesting =
|
||||||
]
|
]
|
||||||
|
|
||||||
birdTracks :: Expectation
|
birdTracks :: Expectation
|
||||||
birdTracks =
|
birdTracks = do
|
||||||
">> code"
|
"> code line"
|
||||||
`shouldLexTo` [ (1, 1, BirdTrack)
|
`shouldLexTo` [ (1, 1, BirdTrack)
|
||||||
, (1, 4, Token "code")
|
, (1, 3, Token "code line")
|
||||||
|
]
|
||||||
|
" > not code"
|
||||||
|
`shouldLexTo` [ (1, 1, Space)
|
||||||
|
, (1, 2, Token ">")
|
||||||
|
, (1, 3, Space)
|
||||||
|
, (1, 4, Token "not")
|
||||||
|
, (1, 7, Space)
|
||||||
|
, (1, 8, Token "code")
|
||||||
|
]
|
||||||
|
|
||||||
|
expressions :: Expectation
|
||||||
|
expressions = do
|
||||||
|
">>> eval this"
|
||||||
|
`shouldLexTo` [ (1, 1, Expression)
|
||||||
|
, (1, 5, Token "eval this")
|
||||||
|
]
|
||||||
|
" >>> not eval this"
|
||||||
|
`shouldLexTo` [ (1, 1, Space)
|
||||||
|
, (1, 2, Token ">>>")
|
||||||
|
, (1, 5, Space)
|
||||||
|
, (1, 6, Token "not")
|
||||||
|
, (1, 9, Space)
|
||||||
|
, (1, 10, Token "eval")
|
||||||
|
, (1, 14, Space)
|
||||||
|
, (1, 15, Token "this")
|
||||||
]
|
]
|
||||||
|
|
||||||
quotes :: Expectation
|
quotes :: Expectation
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue