Applied more formatting; added Makefile (#4)

Reviewed-on: #4
Co-authored-by: Igor Ranieri <igor@elland.me>
Co-committed-by: Igor Ranieri <igor@elland.me>
This commit is contained in:
Igor Ranieri 2025-09-27 07:51:09 +00:00 committed by elland
parent f4912d3339
commit d8ba47a8b6
4 changed files with 285 additions and 274 deletions

View file

@ -1,10 +1,11 @@
{-# LANGUAGE OverloadedStrings #-}
module Lexer (
Token (..),
lexer,
emphasis,
) where
Token (..),
lexer,
emphasis,
)
where
import Control.Monad (mfilter, void)
import Data.Functor (($>))
@ -17,51 +18,52 @@ import Text.Parsec qualified as Parsec
import Text.Parsec.Pos (updatePosChar)
type Located a = (SourcePos, a)
type LocatedToken = (SourcePos, Token)
type Lexer = Parser [LocatedToken]
data Level
= One
| Two
| Three
| Four
| Five
| Six
deriving (Eq, Show)
= One
| Two
| Three
| Four
| Five
| Six
deriving (Eq, Show)
data Token
= Token Text
| Anchor Text
| BirdTrack
| BoldOpen
| BoldClose
| Escape
| EmphasisOpen
| EmphasisClose
| Header Level
| MonospaceOpen
| MonospaceClose
| Newline
| LinkOpen
| LinkClose
| LabeledLinkOpen
| LabeledLinkClose
| ParenOpen
| ParenClose
| BracketOpen
| BracketClose
| MathInlineOpen
| MathInlineClose
| MathMultilineOpen
| MathMultilineClose
| NumericEntity Int
| Module Text
| QuoteOpen
| QuoteClose
| Space
| EOF
deriving (Eq, Show)
= Token Text
| Anchor Text
| BirdTrack
| BoldOpen
| BoldClose
| Escape
| EmphasisOpen
| EmphasisClose
| Header Level
| MonospaceOpen
| MonospaceClose
| Newline
| LinkOpen
| LinkClose
| LabeledLinkOpen
| LabeledLinkClose
| ParenOpen
| ParenClose
| BracketOpen
| BracketClose
| MathInlineOpen
| MathInlineClose
| MathMultilineOpen
| MathMultilineClose
| NumericEntity Int
| Module Text
| QuoteOpen
| QuoteClose
| Space
| EOF
deriving (Eq, Show)
located :: Parser a -> Parser (SourcePos, a)
located p = (,) <$> getPosition <*> p
@ -74,74 +76,74 @@ lexer = Parsec.runParser lexText initialParserState "input" . Text.pack
lexText :: Parser [LocatedToken]
lexText = go
where
go = do
Parsec.optionMaybe Parsec.eof >>= \case
Just _ -> pure []
Nothing -> do
toks <-
choice $
Parsec.try
<$> [ mathMultiline
, mathInline
, escape -- maths go before escape to avoid mismatch
, headers
, newlineToken
, spaceToken
, link
, labeledLink
, module_
, anchor
, textElement
, quotes
, birdTrack
, other
]
rest <- go
pure (toks <> rest)
where
go = do
Parsec.optionMaybe Parsec.eof >>= \case
Just _ -> pure []
Nothing -> do
toks <-
choice $
Parsec.try
<$> [ mathMultiline
, mathInline
, escape -- maths go before escape to avoid mismatch
, headers
, newlineToken
, spaceToken
, link
, labeledLink
, module_
, anchor
, textElement
, quotes
, birdTrack
, other
]
rest <- go
pure (toks <> rest)
-- Tokens
textElement :: Parser [LocatedToken]
textElement =
choice $
Parsec.try
<$> [ emphasis
, bold
, monospace
]
choice $
Parsec.try
<$> [ emphasis
, bold
, monospace
]
headers :: Parser [LocatedToken]
headers =
choice $
Parsec.try
<$> [ header1
, header2
, header3
, header4
, header5
, header6
]
choice $
Parsec.try
<$> [ header1
, header2
, header3
, header4
, header5
, header6
]
anyUntil :: Parser a -> Parser Text
anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p)
delimitedAsTuple :: Parser open -> Parser close -> Parser (Located open, LocatedToken, Located close)
delimitedAsTuple openP closeP =
(,,)
<$> located openP
<*> located (Token <$> anyUntil closeP)
<*> located closeP
(,,)
<$> located openP
<*> located (Token <$> anyUntil closeP)
<*> located closeP
delimited :: Parser open -> Parser close -> Token -> Token -> Parser [LocatedToken]
delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP)
where
asList (a, tok, b) = [a, tok, b]
where
asList (a, tok, b) = [a, tok, b]
delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken]
delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
where
asList (a, tok, _) = [a, tok]
where
asList (a, tok, _) = [a, tok]
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
delimitedSymmetric s t1 t2 = delimited s s t1 t2
@ -170,9 +172,8 @@ header6 = delimitedNoTrailing "====== " eol (Header Six)
-- #anchors#
anchor :: Lexer
anchor = do
x <- located $ between "#" "#" (Anchor <$> anyUntil "#")
pure [x]
x <- located $ between "#" "#" (Anchor <$> anyUntil "#")
pure [x]
moduleNames :: Parser Text
moduleNames = intercalate "." . fmap Text.pack <$> upperId `sepBy1` char '.'
@ -188,47 +189,47 @@ identifierChar = satisfy (\c -> isAlphaNum c || c == '_')
-- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben
module_ :: Lexer
module_ = between (char '"') (char '"') inner
where
inner = do
m <- located $ Module <$> moduleNames
mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText))
pure $ case mAnchor of
Just anc -> [m, anc]
Nothing -> [m]
where
inner = do
m <- located $ Module <$> moduleNames
mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText))
pure $ case mAnchor of
Just anc -> [m, anc]
Nothing -> [m]
anchorHash :: Parser Text
anchorHash = "#" <|> try "\\#"
anchorHash :: Parser Text
anchorHash = "#" <|> try "\\#"
anchorText :: Parser Text
anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
anchorText :: Parser Text
anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
linkRaw :: Lexer
linkRaw =
tokenise
[ BracketOpen <$ "["
, Token <$> anyUntil "]"
, BracketClose <$ "]"
, ParenOpen <$ "("
, Token <$> anyUntil ")"
, ParenClose <$ ")"
]
tokenise
[ BracketOpen <$ "["
, Token <$> anyUntil "]"
, BracketClose <$ "]"
, ParenOpen <$ "("
, Token <$> anyUntil ")"
, ParenClose <$ ")"
]
link :: Lexer
link = do
pos <- getPosition
l <- linkRaw
-- register the position of the last token
pos' <- flip incSourceColumn (-1) <$> getPosition
pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)]
pos <- getPosition
l <- linkRaw
-- register the position of the last token
pos' <- flip incSourceColumn (-1) <$> getPosition
pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)]
labeledLink :: Lexer
labeledLink = do
open <- located $ LabeledLinkOpen <$ "<"
linkRes <- linkRaw
labelRes <- located $ Token <$> anyUntil ">"
close <- located $ LabeledLinkClose <$ ">"
pure $
open : linkRes <> [ labelRes , close ]
open <- located $ LabeledLinkOpen <$ "<"
linkRes <- linkRaw
labelRes <- located $ Token <$> anyUntil ">"
close <- located $ LabeledLinkClose <$ ">"
pure $
open : linkRes <> [labelRes, close]
mathMultiline :: Lexer
mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose
@ -256,23 +257,23 @@ monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
other :: Lexer
other = do
pos <- getPosition
c <- takeWhile1_ isUnicodeAlphaNum
pure . pure $ (pos, Token c)
where
isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
pos <- getPosition
c <- takeWhile1_ isUnicodeAlphaNum
pure . pure $ (pos, Token c)
where
isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
spaceToken :: Lexer
spaceToken = do
pos <- getPosition
_ <- many1 (char ' ')
pure . pure $ (pos, Space)
pos <- getPosition
_ <- many1 (char ' ')
pure . pure $ (pos, Space)
newlineToken :: Lexer
newlineToken = do
pos <- getPosition
_ <- newline
pure . pure $ (pos, Newline)
pos <- getPosition
_ <- newline
pure . pure $ (pos, Newline)
-------
-- Helpers
@ -281,11 +282,11 @@ newlineToken = do
-- | Like `takeWhile`, but unconditionally take escaped characters.
takeWhile_ :: (Char -> Bool) -> Parser Text
takeWhile_ p = scan p_ False
where
p_ escaped c
| escaped = Just False
| not $ p c = Nothing
| otherwise = Just (c == '\\')
where
p_ escaped c
| escaped = Just False
| not $ p c = Nothing
| otherwise = Just (c == '\\')
-- | Like 'takeWhile1', but unconditionally take escaped characters.
takeWhile1_ :: (Char -> Bool) -> Parser Text
@ -295,19 +296,20 @@ takeWhile1_ = mfilter (not . Text.null) . takeWhile_
function returns true.
-}
scan ::
-- | scan function
(state -> Char -> Maybe state) ->
-- | initial state
state ->
Parser Text
-- | scan function
(state -> Char -> Maybe state) ->
-- | initial state
state ->
Parser Text
scan f initState = do
parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
(remaining, finalPos, ct) <- go input initState pos 0
let newState = parserState{stateInput = remaining, statePos = finalPos}
Parsec.setParserState newState $> Text.take ct input
where
go !input' !st !posAccum !count' = case Text.uncons input' of
Nothing -> pure (input', posAccum, count')
Just (char', input'') -> case f st char' of
Nothing -> pure (input', posAccum, count')
Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
(remaining, finalPos, ct) <- go input initState pos 0
let newState = parserState{stateInput = remaining, statePos = finalPos}
Parsec.setParserState newState $> Text.take ct input
where
go !input' !st !posAccum !count' = case Text.uncons input' of
Nothing -> pure (input', posAccum, count')
Just (char', input'') -> case f st char' of
Nothing -> pure (input', posAccum, count')
Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)