Embed Anchor content inside token
This commit is contained in:
parent
82e1c76fe7
commit
8887476626
3 changed files with 311 additions and 313 deletions
385
src/Lexer.hs
385
src/Lexer.hs
|
|
@ -1,9 +1,9 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Lexer (
|
||||
Token (..),
|
||||
lexer,
|
||||
emphasis,
|
||||
Token (..),
|
||||
lexer,
|
||||
emphasis,
|
||||
) where
|
||||
|
||||
import Control.Monad (mfilter, void)
|
||||
|
|
@ -21,116 +21,117 @@ type LocatedToken = (SourcePos, Token)
|
|||
type Lexer = Parser [LocatedToken]
|
||||
|
||||
data Level
|
||||
= One
|
||||
| Two
|
||||
| Three
|
||||
| Four
|
||||
| Five
|
||||
| Six
|
||||
deriving (Eq, Show)
|
||||
= One
|
||||
| Two
|
||||
| Three
|
||||
| Four
|
||||
| Five
|
||||
| Six
|
||||
deriving (Eq, Show)
|
||||
|
||||
data Token
|
||||
= Token Text
|
||||
| Anchor
|
||||
| BirdTrack
|
||||
| BoldOpen
|
||||
| BoldClose
|
||||
| Escape
|
||||
| EmphasisOpen
|
||||
| EmphasisClose
|
||||
| Header Level
|
||||
| MonospaceOpen
|
||||
| MonospaceClose
|
||||
| Newline
|
||||
| LinkOpen
|
||||
| LinkClose
|
||||
| LabeledLinkOpen
|
||||
| LabeledLinkClose
|
||||
| ParenOpen
|
||||
| ParenClose
|
||||
| BracketOpen
|
||||
| BracketClose
|
||||
| MathsParenOpen
|
||||
| MathsParenClose
|
||||
| MathsBracketOpen
|
||||
| MathsBracketClose
|
||||
| Module
|
||||
| QuoteOpen
|
||||
| QuoteClose
|
||||
| Space
|
||||
| EOF
|
||||
deriving (Eq, Show)
|
||||
= Token Text
|
||||
| Anchor Text
|
||||
| BirdTrack
|
||||
| BoldOpen
|
||||
| BoldClose
|
||||
| Escape
|
||||
| EmphasisOpen
|
||||
| EmphasisClose
|
||||
| Header Level
|
||||
| MonospaceOpen
|
||||
| MonospaceClose
|
||||
| Newline
|
||||
| LinkOpen
|
||||
| LinkClose
|
||||
| LabeledLinkOpen
|
||||
| LabeledLinkClose
|
||||
| ParenOpen
|
||||
| ParenClose
|
||||
| BracketOpen
|
||||
| BracketClose
|
||||
| MathsParenOpen
|
||||
| MathsParenClose
|
||||
| MathsBracketOpen
|
||||
| MathsBracketClose
|
||||
| NumericEntity Int
|
||||
| Module
|
||||
| QuoteOpen
|
||||
| QuoteClose
|
||||
| Space
|
||||
| EOF
|
||||
deriving (Eq, Show)
|
||||
|
||||
lexer :: String -> Either ParseError [LocatedToken]
|
||||
lexer = Parsec.runParser lexText initialParserState "input" . Text.pack
|
||||
|
||||
lexText :: Parser [LocatedToken]
|
||||
lexText = go
|
||||
where
|
||||
go = do
|
||||
Parsec.optionMaybe Parsec.eof >>= \case
|
||||
Just _ -> pure []
|
||||
Nothing -> do
|
||||
toks <-
|
||||
choice $
|
||||
Parsec.try
|
||||
<$> [ mathsBracket
|
||||
, mathsParens
|
||||
, escape -- maths go before escape to avoid mismatch
|
||||
, headers
|
||||
, newlineToken
|
||||
, spaceToken
|
||||
, link
|
||||
, labeledLink
|
||||
, modules
|
||||
, anchors
|
||||
, textElement
|
||||
, quotes
|
||||
, birdTrack
|
||||
, other
|
||||
]
|
||||
rest <- go
|
||||
pure (toks <> rest)
|
||||
where
|
||||
go = do
|
||||
Parsec.optionMaybe Parsec.eof >>= \case
|
||||
Just _ -> pure []
|
||||
Nothing -> do
|
||||
toks <-
|
||||
choice $
|
||||
Parsec.try
|
||||
<$> [ mathsBracket
|
||||
, mathsParens
|
||||
, escape -- maths go before escape to avoid mismatch
|
||||
, headers
|
||||
, newlineToken
|
||||
, spaceToken
|
||||
, link
|
||||
, labeledLink
|
||||
, modules
|
||||
, anchors
|
||||
, textElement
|
||||
, quotes
|
||||
, birdTrack
|
||||
, other
|
||||
]
|
||||
rest <- go
|
||||
pure (toks <> rest)
|
||||
|
||||
-- Tokens
|
||||
|
||||
textElement :: Parser [LocatedToken]
|
||||
textElement =
|
||||
choice $
|
||||
Parsec.try
|
||||
<$> [ emphasis
|
||||
, bold
|
||||
, monospace
|
||||
]
|
||||
choice $
|
||||
Parsec.try
|
||||
<$> [ emphasis
|
||||
, bold
|
||||
, monospace
|
||||
]
|
||||
|
||||
headers :: Parser [LocatedToken]
|
||||
headers =
|
||||
choice $
|
||||
Parsec.try
|
||||
<$> [ header1
|
||||
, header2
|
||||
, header3
|
||||
, header4
|
||||
, header5
|
||||
, header6
|
||||
]
|
||||
choice $
|
||||
Parsec.try
|
||||
<$> [ header1
|
||||
, header2
|
||||
, header3
|
||||
, header4
|
||||
, header5
|
||||
, header6
|
||||
]
|
||||
|
||||
delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken]
|
||||
delimitedMaybe openMark closeMark openToken closeToken = do
|
||||
openPos <- getPosition
|
||||
void openMark
|
||||
tokenPos <- getPosition
|
||||
content <- anyUntil closeMark
|
||||
closePos <- getPosition
|
||||
void closeMark
|
||||
openPos <- getPosition
|
||||
void openMark
|
||||
tokenPos <- getPosition
|
||||
content <- anyUntil closeMark
|
||||
closePos <- getPosition
|
||||
void closeMark
|
||||
|
||||
let openTok :: LocatedToken = (openPos, openToken)
|
||||
res :: LocatedToken = (tokenPos, Token content)
|
||||
closeToks :: [LocatedToken] = case closeToken of
|
||||
Just close -> [(closePos, close)]
|
||||
Nothing -> []
|
||||
let openTok :: LocatedToken = (openPos, openToken)
|
||||
res :: LocatedToken = (tokenPos, Token content)
|
||||
closeToks :: [LocatedToken] = case closeToken of
|
||||
Just close -> [(closePos, close)]
|
||||
Nothing -> []
|
||||
|
||||
pure $ [openTok, res] <> closeToks
|
||||
pure $ [openTok, res] <> closeToks
|
||||
|
||||
anyUntil :: Parser a -> Parser Text
|
||||
anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p)
|
||||
|
|
@ -165,99 +166,91 @@ header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing
|
|||
-- #anchors#
|
||||
anchors :: Lexer
|
||||
anchors = do
|
||||
pos <- getPosition
|
||||
void $ try anchor'
|
||||
pos' <- getPosition
|
||||
txt <- anyUntil anchor'
|
||||
pos'' <- getPosition
|
||||
void $ try anchor'
|
||||
pos <- getPosition
|
||||
void $ try anchor'
|
||||
txt <- anyUntil anchor'
|
||||
void $ try anchor'
|
||||
|
||||
pure
|
||||
[ (pos, Anchor)
|
||||
, (pos', Token txt)
|
||||
, (pos'', Anchor)
|
||||
]
|
||||
where
|
||||
anchor' = (string "#" <|> string "\\#")
|
||||
pure [(pos, Anchor txt)]
|
||||
where
|
||||
anchor' = (string "#" <|> string "\\#")
|
||||
|
||||
-- "Module.Name"
|
||||
-- "Module.Name#anchor"
|
||||
-- "Module.Name#anchor"
|
||||
modules :: Lexer
|
||||
modules = do
|
||||
pos <- getPosition
|
||||
void $ char '"'
|
||||
pos' <- getPosition
|
||||
modName <- modId
|
||||
anch <- option [] do
|
||||
pos'' <- getPosition
|
||||
void $ try (string "#" <|> string "\\#")
|
||||
pos''' <- getPosition
|
||||
a <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
|
||||
pure [(pos'', Anchor), (pos''', Token a)]
|
||||
pos <- getPosition
|
||||
void $ char '"'
|
||||
pos' <- getPosition
|
||||
modName <- modId
|
||||
anch <- option [] do
|
||||
pos'' <- getPosition
|
||||
void $ try (string "#" <|> string "\\#")
|
||||
a <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
|
||||
pure [(pos'', Anchor a)]
|
||||
|
||||
void $ char '"'
|
||||
pure $ [(pos, Module), (pos', Token modName)] <> anch
|
||||
where
|
||||
modId = intercalate "." <$> (fmap Text.pack <$> (conId `sepBy1` (char '.')))
|
||||
void $ char '"'
|
||||
pure $ [(pos, Module), (pos', Token modName)] <> anch
|
||||
where
|
||||
modId = intercalate "." <$> (fmap Text.pack <$> (conId `sepBy1` (char '.')))
|
||||
|
||||
conId :: Parser String
|
||||
conId =
|
||||
(:)
|
||||
<$> satisfy (\c -> isAlpha c && isUpper c)
|
||||
<*> many1 conChar
|
||||
conId :: Parser String
|
||||
conId =
|
||||
(:)
|
||||
<$> satisfy (\c -> isAlpha c && isUpper c)
|
||||
<*> many1 conChar
|
||||
|
||||
conChar :: Parser Char
|
||||
conChar = satisfy (\c -> isAlphaNum c || c == '_')
|
||||
conChar :: Parser Char
|
||||
conChar = satisfy (\c -> isAlphaNum c || c == '_')
|
||||
|
||||
linkRaw :: Lexer
|
||||
linkRaw = do
|
||||
pos1 <- getPosition
|
||||
void $ string "["
|
||||
pos2 <- getPosition
|
||||
text <- anyUntil $ Text.pack <$> string "]"
|
||||
pos3 <- getPosition
|
||||
void $ "]"
|
||||
pos4 <- getPosition
|
||||
void $ "("
|
||||
pos5 <- getPosition
|
||||
link' <- anyUntil $ Text.pack <$> string ")"
|
||||
pos6 <- getPosition
|
||||
void $ ")"
|
||||
pos1 <- getPosition
|
||||
void $ string "["
|
||||
pos2 <- getPosition
|
||||
text <- anyUntil $ Text.pack <$> string "]"
|
||||
pos3 <- getPosition
|
||||
void $ "]"
|
||||
pos4 <- getPosition
|
||||
void $ "("
|
||||
pos5 <- getPosition
|
||||
link' <- anyUntil $ Text.pack <$> string ")"
|
||||
pos6 <- getPosition
|
||||
void $ ")"
|
||||
|
||||
pure $
|
||||
[ (pos1, BracketOpen)
|
||||
, (pos2, Token text)
|
||||
, (pos3, BracketClose)
|
||||
, (pos4, ParenOpen)
|
||||
, (pos5, Token link')
|
||||
, (pos6, ParenClose)
|
||||
]
|
||||
pure $
|
||||
[ (pos1, BracketOpen)
|
||||
, (pos2, Token text)
|
||||
, (pos3, BracketClose)
|
||||
, (pos4, ParenOpen)
|
||||
, (pos5, Token link')
|
||||
, (pos6, ParenClose)
|
||||
]
|
||||
|
||||
link :: Lexer
|
||||
link = do
|
||||
pos <- getPosition
|
||||
l <- linkRaw
|
||||
-- "unconsume" the last token
|
||||
pos' <- flip incSourceColumn (-1) <$> getPosition
|
||||
pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)]
|
||||
pos <- getPosition
|
||||
l <- linkRaw
|
||||
-- "unconsume" the last token
|
||||
pos' <- flip incSourceColumn (-1) <$> getPosition
|
||||
pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)]
|
||||
|
||||
labeledLink :: Lexer
|
||||
labeledLink = do
|
||||
pos <- getPosition
|
||||
void $ string "<"
|
||||
link' <- linkRaw
|
||||
pos7 <- getPosition
|
||||
label' <- anyUntil $ string ">"
|
||||
pos8 <- getPosition
|
||||
void $ ">"
|
||||
pos <- getPosition
|
||||
void $ string "<"
|
||||
link' <- linkRaw
|
||||
pos7 <- getPosition
|
||||
label' <- anyUntil $ string ">"
|
||||
pos8 <- getPosition
|
||||
void $ ">"
|
||||
|
||||
pure $
|
||||
(pos, LabeledLinkOpen)
|
||||
: link'
|
||||
<> [ (pos7, Token label')
|
||||
, (pos8, LabeledLinkClose)
|
||||
]
|
||||
pure $
|
||||
(pos, LabeledLinkOpen)
|
||||
: link'
|
||||
<> [ (pos7, Token label')
|
||||
, (pos8, LabeledLinkClose)
|
||||
]
|
||||
|
||||
mathsBracket :: Lexer
|
||||
mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose
|
||||
|
|
@ -285,23 +278,23 @@ monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
|
|||
|
||||
other :: Lexer
|
||||
other = do
|
||||
pos <- getPosition
|
||||
c <- takeWhile1_ isUnicodeAlphaNum
|
||||
pure . pure $ (pos, Token c)
|
||||
where
|
||||
isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
|
||||
pos <- getPosition
|
||||
c <- takeWhile1_ isUnicodeAlphaNum
|
||||
pure . pure $ (pos, Token c)
|
||||
where
|
||||
isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
|
||||
|
||||
spaceToken :: Lexer
|
||||
spaceToken = do
|
||||
pos <- getPosition
|
||||
_ <- many1 (char ' ')
|
||||
pure . pure $ (pos, Space)
|
||||
pos <- getPosition
|
||||
_ <- many1 (char ' ')
|
||||
pure . pure $ (pos, Space)
|
||||
|
||||
newlineToken :: Lexer
|
||||
newlineToken = do
|
||||
pos <- getPosition
|
||||
_ <- newline
|
||||
pure . pure $ (pos, Newline)
|
||||
pos <- getPosition
|
||||
_ <- newline
|
||||
pure . pure $ (pos, Newline)
|
||||
|
||||
-------
|
||||
-- Helpers
|
||||
|
|
@ -310,11 +303,11 @@ newlineToken = do
|
|||
-- | Like `takeWhile`, but unconditionally take escaped characters.
|
||||
takeWhile_ :: (Char -> Bool) -> Parser Text
|
||||
takeWhile_ p = scan p_ False
|
||||
where
|
||||
p_ escaped c
|
||||
| escaped = Just False
|
||||
| not $ p c = Nothing
|
||||
| otherwise = Just (c == '\\')
|
||||
where
|
||||
p_ escaped c
|
||||
| escaped = Just False
|
||||
| not $ p c = Nothing
|
||||
| otherwise = Just (c == '\\')
|
||||
|
||||
-- | Like 'takeWhile1', but unconditionally take escaped characters.
|
||||
takeWhile1_ :: (Char -> Bool) -> Parser Text
|
||||
|
|
@ -324,19 +317,19 @@ takeWhile1_ = mfilter (not . Text.null) . takeWhile_
|
|||
function returns true.
|
||||
-}
|
||||
scan ::
|
||||
-- | scan function
|
||||
(state -> Char -> Maybe state) ->
|
||||
-- | initial state
|
||||
state ->
|
||||
Parser Text
|
||||
-- | scan function
|
||||
(state -> Char -> Maybe state) ->
|
||||
-- | initial state
|
||||
state ->
|
||||
Parser Text
|
||||
scan f initState = do
|
||||
parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
|
||||
(remaining, finalPos, ct) <- go input initState pos 0
|
||||
let newState = parserState{stateInput = remaining, statePos = finalPos}
|
||||
Parsec.setParserState newState $> Text.take ct input
|
||||
where
|
||||
go !input' !st !posAccum !count' = case Text.uncons input' of
|
||||
Nothing -> pure (input', posAccum, count')
|
||||
Just (char', input'') -> case f st char' of
|
||||
Nothing -> pure (input', posAccum, count')
|
||||
Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
|
||||
parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
|
||||
(remaining, finalPos, ct) <- go input initState pos 0
|
||||
let newState = parserState{stateInput = remaining, statePos = finalPos}
|
||||
Parsec.setParserState newState $> Text.take ct input
|
||||
where
|
||||
go !input' !st !posAccum !count' = case Text.uncons input' of
|
||||
Nothing -> pure (input', posAccum, count')
|
||||
Just (char', input'') -> case f st char' of
|
||||
Nothing -> pure (input', posAccum, count')
|
||||
Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue