Applied more formatting; added Makefile (#4)
Reviewed-on: #4 Co-authored-by: Igor Ranieri <igor@elland.me> Co-committed-by: Igor Ranieri <igor@elland.me>
This commit is contained in:
parent
f4912d3339
commit
d8ba47a8b6
4 changed files with 285 additions and 274 deletions
310
src/Lexer.hs
310
src/Lexer.hs
|
|
@ -1,10 +1,11 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Lexer (
|
||||
Token (..),
|
||||
lexer,
|
||||
emphasis,
|
||||
) where
|
||||
Token (..),
|
||||
lexer,
|
||||
emphasis,
|
||||
)
|
||||
where
|
||||
|
||||
import Control.Monad (mfilter, void)
|
||||
import Data.Functor (($>))
|
||||
|
|
@ -17,51 +18,52 @@ import Text.Parsec qualified as Parsec
|
|||
import Text.Parsec.Pos (updatePosChar)
|
||||
|
||||
type Located a = (SourcePos, a)
|
||||
|
||||
type LocatedToken = (SourcePos, Token)
|
||||
|
||||
type Lexer = Parser [LocatedToken]
|
||||
|
||||
data Level
|
||||
= One
|
||||
| Two
|
||||
| Three
|
||||
| Four
|
||||
| Five
|
||||
| Six
|
||||
deriving (Eq, Show)
|
||||
= One
|
||||
| Two
|
||||
| Three
|
||||
| Four
|
||||
| Five
|
||||
| Six
|
||||
deriving (Eq, Show)
|
||||
|
||||
data Token
|
||||
= Token Text
|
||||
| Anchor Text
|
||||
| BirdTrack
|
||||
| BoldOpen
|
||||
| BoldClose
|
||||
| Escape
|
||||
| EmphasisOpen
|
||||
| EmphasisClose
|
||||
| Header Level
|
||||
| MonospaceOpen
|
||||
| MonospaceClose
|
||||
| Newline
|
||||
| LinkOpen
|
||||
| LinkClose
|
||||
| LabeledLinkOpen
|
||||
| LabeledLinkClose
|
||||
| ParenOpen
|
||||
| ParenClose
|
||||
| BracketOpen
|
||||
| BracketClose
|
||||
| MathInlineOpen
|
||||
| MathInlineClose
|
||||
| MathMultilineOpen
|
||||
| MathMultilineClose
|
||||
| NumericEntity Int
|
||||
| Module Text
|
||||
| QuoteOpen
|
||||
| QuoteClose
|
||||
| Space
|
||||
| EOF
|
||||
deriving (Eq, Show)
|
||||
= Token Text
|
||||
| Anchor Text
|
||||
| BirdTrack
|
||||
| BoldOpen
|
||||
| BoldClose
|
||||
| Escape
|
||||
| EmphasisOpen
|
||||
| EmphasisClose
|
||||
| Header Level
|
||||
| MonospaceOpen
|
||||
| MonospaceClose
|
||||
| Newline
|
||||
| LinkOpen
|
||||
| LinkClose
|
||||
| LabeledLinkOpen
|
||||
| LabeledLinkClose
|
||||
| ParenOpen
|
||||
| ParenClose
|
||||
| BracketOpen
|
||||
| BracketClose
|
||||
| MathInlineOpen
|
||||
| MathInlineClose
|
||||
| MathMultilineOpen
|
||||
| MathMultilineClose
|
||||
| NumericEntity Int
|
||||
| Module Text
|
||||
| QuoteOpen
|
||||
| QuoteClose
|
||||
| Space
|
||||
| EOF
|
||||
deriving (Eq, Show)
|
||||
|
||||
located :: Parser a -> Parser (SourcePos, a)
|
||||
located p = (,) <$> getPosition <*> p
|
||||
|
|
@ -74,74 +76,74 @@ lexer = Parsec.runParser lexText initialParserState "input" . Text.pack
|
|||
|
||||
lexText :: Parser [LocatedToken]
|
||||
lexText = go
|
||||
where
|
||||
go = do
|
||||
Parsec.optionMaybe Parsec.eof >>= \case
|
||||
Just _ -> pure []
|
||||
Nothing -> do
|
||||
toks <-
|
||||
choice $
|
||||
Parsec.try
|
||||
<$> [ mathMultiline
|
||||
, mathInline
|
||||
, escape -- maths go before escape to avoid mismatch
|
||||
, headers
|
||||
, newlineToken
|
||||
, spaceToken
|
||||
, link
|
||||
, labeledLink
|
||||
, module_
|
||||
, anchor
|
||||
, textElement
|
||||
, quotes
|
||||
, birdTrack
|
||||
, other
|
||||
]
|
||||
rest <- go
|
||||
pure (toks <> rest)
|
||||
where
|
||||
go = do
|
||||
Parsec.optionMaybe Parsec.eof >>= \case
|
||||
Just _ -> pure []
|
||||
Nothing -> do
|
||||
toks <-
|
||||
choice $
|
||||
Parsec.try
|
||||
<$> [ mathMultiline
|
||||
, mathInline
|
||||
, escape -- maths go before escape to avoid mismatch
|
||||
, headers
|
||||
, newlineToken
|
||||
, spaceToken
|
||||
, link
|
||||
, labeledLink
|
||||
, module_
|
||||
, anchor
|
||||
, textElement
|
||||
, quotes
|
||||
, birdTrack
|
||||
, other
|
||||
]
|
||||
rest <- go
|
||||
pure (toks <> rest)
|
||||
|
||||
-- Tokens
|
||||
|
||||
textElement :: Parser [LocatedToken]
|
||||
textElement =
|
||||
choice $
|
||||
Parsec.try
|
||||
<$> [ emphasis
|
||||
, bold
|
||||
, monospace
|
||||
]
|
||||
choice $
|
||||
Parsec.try
|
||||
<$> [ emphasis
|
||||
, bold
|
||||
, monospace
|
||||
]
|
||||
|
||||
headers :: Parser [LocatedToken]
|
||||
headers =
|
||||
choice $
|
||||
Parsec.try
|
||||
<$> [ header1
|
||||
, header2
|
||||
, header3
|
||||
, header4
|
||||
, header5
|
||||
, header6
|
||||
]
|
||||
choice $
|
||||
Parsec.try
|
||||
<$> [ header1
|
||||
, header2
|
||||
, header3
|
||||
, header4
|
||||
, header5
|
||||
, header6
|
||||
]
|
||||
|
||||
anyUntil :: Parser a -> Parser Text
|
||||
anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p)
|
||||
|
||||
delimitedAsTuple :: Parser open -> Parser close -> Parser (Located open, LocatedToken, Located close)
|
||||
delimitedAsTuple openP closeP =
|
||||
(,,)
|
||||
<$> located openP
|
||||
<*> located (Token <$> anyUntil closeP)
|
||||
<*> located closeP
|
||||
(,,)
|
||||
<$> located openP
|
||||
<*> located (Token <$> anyUntil closeP)
|
||||
<*> located closeP
|
||||
|
||||
delimited :: Parser open -> Parser close -> Token -> Token -> Parser [LocatedToken]
|
||||
delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP)
|
||||
where
|
||||
asList (a, tok, b) = [a, tok, b]
|
||||
where
|
||||
asList (a, tok, b) = [a, tok, b]
|
||||
|
||||
delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken]
|
||||
delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
|
||||
where
|
||||
asList (a, tok, _) = [a, tok]
|
||||
where
|
||||
asList (a, tok, _) = [a, tok]
|
||||
|
||||
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
|
||||
delimitedSymmetric s t1 t2 = delimited s s t1 t2
|
||||
|
|
@ -170,9 +172,8 @@ header6 = delimitedNoTrailing "====== " eol (Header Six)
|
|||
-- #anchors#
|
||||
anchor :: Lexer
|
||||
anchor = do
|
||||
x <- located $ between "#" "#" (Anchor <$> anyUntil "#")
|
||||
pure [x]
|
||||
|
||||
x <- located $ between "#" "#" (Anchor <$> anyUntil "#")
|
||||
pure [x]
|
||||
|
||||
moduleNames :: Parser Text
|
||||
moduleNames = intercalate "." . fmap Text.pack <$> upperId `sepBy1` char '.'
|
||||
|
|
@ -188,47 +189,47 @@ identifierChar = satisfy (\c -> isAlphaNum c || c == '_')
|
|||
-- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben
|
||||
module_ :: Lexer
|
||||
module_ = between (char '"') (char '"') inner
|
||||
where
|
||||
inner = do
|
||||
m <- located $ Module <$> moduleNames
|
||||
mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText))
|
||||
pure $ case mAnchor of
|
||||
Just anc -> [m, anc]
|
||||
Nothing -> [m]
|
||||
where
|
||||
inner = do
|
||||
m <- located $ Module <$> moduleNames
|
||||
mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText))
|
||||
pure $ case mAnchor of
|
||||
Just anc -> [m, anc]
|
||||
Nothing -> [m]
|
||||
|
||||
anchorHash :: Parser Text
|
||||
anchorHash = "#" <|> try "\\#"
|
||||
anchorHash :: Parser Text
|
||||
anchorHash = "#" <|> try "\\#"
|
||||
|
||||
anchorText :: Parser Text
|
||||
anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
|
||||
anchorText :: Parser Text
|
||||
anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
|
||||
|
||||
linkRaw :: Lexer
|
||||
linkRaw =
|
||||
tokenise
|
||||
[ BracketOpen <$ "["
|
||||
, Token <$> anyUntil "]"
|
||||
, BracketClose <$ "]"
|
||||
, ParenOpen <$ "("
|
||||
, Token <$> anyUntil ")"
|
||||
, ParenClose <$ ")"
|
||||
]
|
||||
tokenise
|
||||
[ BracketOpen <$ "["
|
||||
, Token <$> anyUntil "]"
|
||||
, BracketClose <$ "]"
|
||||
, ParenOpen <$ "("
|
||||
, Token <$> anyUntil ")"
|
||||
, ParenClose <$ ")"
|
||||
]
|
||||
|
||||
link :: Lexer
|
||||
link = do
|
||||
pos <- getPosition
|
||||
l <- linkRaw
|
||||
-- register the position of the last token
|
||||
pos' <- flip incSourceColumn (-1) <$> getPosition
|
||||
pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)]
|
||||
pos <- getPosition
|
||||
l <- linkRaw
|
||||
-- register the position of the last token
|
||||
pos' <- flip incSourceColumn (-1) <$> getPosition
|
||||
pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)]
|
||||
|
||||
labeledLink :: Lexer
|
||||
labeledLink = do
|
||||
open <- located $ LabeledLinkOpen <$ "<"
|
||||
linkRes <- linkRaw
|
||||
labelRes <- located $ Token <$> anyUntil ">"
|
||||
close <- located $ LabeledLinkClose <$ ">"
|
||||
pure $
|
||||
open : linkRes <> [ labelRes , close ]
|
||||
open <- located $ LabeledLinkOpen <$ "<"
|
||||
linkRes <- linkRaw
|
||||
labelRes <- located $ Token <$> anyUntil ">"
|
||||
close <- located $ LabeledLinkClose <$ ">"
|
||||
pure $
|
||||
open : linkRes <> [labelRes, close]
|
||||
|
||||
mathMultiline :: Lexer
|
||||
mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose
|
||||
|
|
@ -256,23 +257,23 @@ monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
|
|||
|
||||
other :: Lexer
|
||||
other = do
|
||||
pos <- getPosition
|
||||
c <- takeWhile1_ isUnicodeAlphaNum
|
||||
pure . pure $ (pos, Token c)
|
||||
where
|
||||
isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
|
||||
pos <- getPosition
|
||||
c <- takeWhile1_ isUnicodeAlphaNum
|
||||
pure . pure $ (pos, Token c)
|
||||
where
|
||||
isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
|
||||
|
||||
spaceToken :: Lexer
|
||||
spaceToken = do
|
||||
pos <- getPosition
|
||||
_ <- many1 (char ' ')
|
||||
pure . pure $ (pos, Space)
|
||||
pos <- getPosition
|
||||
_ <- many1 (char ' ')
|
||||
pure . pure $ (pos, Space)
|
||||
|
||||
newlineToken :: Lexer
|
||||
newlineToken = do
|
||||
pos <- getPosition
|
||||
_ <- newline
|
||||
pure . pure $ (pos, Newline)
|
||||
pos <- getPosition
|
||||
_ <- newline
|
||||
pure . pure $ (pos, Newline)
|
||||
|
||||
-------
|
||||
-- Helpers
|
||||
|
|
@ -281,11 +282,11 @@ newlineToken = do
|
|||
-- | Like `takeWhile`, but unconditionally take escaped characters.
|
||||
takeWhile_ :: (Char -> Bool) -> Parser Text
|
||||
takeWhile_ p = scan p_ False
|
||||
where
|
||||
p_ escaped c
|
||||
| escaped = Just False
|
||||
| not $ p c = Nothing
|
||||
| otherwise = Just (c == '\\')
|
||||
where
|
||||
p_ escaped c
|
||||
| escaped = Just False
|
||||
| not $ p c = Nothing
|
||||
| otherwise = Just (c == '\\')
|
||||
|
||||
-- | Like 'takeWhile1', but unconditionally take escaped characters.
|
||||
takeWhile1_ :: (Char -> Bool) -> Parser Text
|
||||
|
|
@ -295,19 +296,20 @@ takeWhile1_ = mfilter (not . Text.null) . takeWhile_
|
|||
function returns true.
|
||||
-}
|
||||
scan ::
|
||||
-- | scan function
|
||||
(state -> Char -> Maybe state) ->
|
||||
-- | initial state
|
||||
state ->
|
||||
Parser Text
|
||||
-- | scan function
|
||||
(state -> Char -> Maybe state) ->
|
||||
-- | initial state
|
||||
state ->
|
||||
Parser Text
|
||||
scan f initState = do
|
||||
parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
|
||||
(remaining, finalPos, ct) <- go input initState pos 0
|
||||
let newState = parserState{stateInput = remaining, statePos = finalPos}
|
||||
Parsec.setParserState newState $> Text.take ct input
|
||||
where
|
||||
go !input' !st !posAccum !count' = case Text.uncons input' of
|
||||
Nothing -> pure (input', posAccum, count')
|
||||
Just (char', input'') -> case f st char' of
|
||||
Nothing -> pure (input', posAccum, count')
|
||||
Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
|
||||
parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
|
||||
|
||||
(remaining, finalPos, ct) <- go input initState pos 0
|
||||
let newState = parserState{stateInput = remaining, statePos = finalPos}
|
||||
Parsec.setParserState newState $> Text.take ct input
|
||||
where
|
||||
go !input' !st !posAccum !count' = case Text.uncons input' of
|
||||
Nothing -> pure (input', posAccum, count')
|
||||
Just (char', input'') -> case f st char' of
|
||||
Nothing -> pure (input', posAccum, count')
|
||||
Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue