Compare commits

...

3 commits

Author SHA1 Message Date
20b5ffac36
ref(moduleName): break into multiple smaller functions
upperId has been changed to only use isUpper because an non alphabetical
character would be false anyway
2025-09-24 20:47:55 +08:00
c956c97e0c
ref(lexer): simplify anchor 2025-09-24 20:25:48 +08:00
064e253f03
ref(lexer): simplify delimited logic 2025-09-24 20:25:14 +08:00
2 changed files with 56 additions and 63 deletions

View file

@ -16,6 +16,7 @@ import Text.Parsec
import Text.Parsec qualified as Parsec
import Text.Parsec.Pos (updatePosChar)
type Located a = (SourcePos, a)
type LocatedToken = (SourcePos, Token)
type Lexer = Parser [LocatedToken]
@ -55,7 +56,7 @@ data Token
| MathsBracketOpen
| MathsBracketClose
| NumericEntity Int
| Module
| Module Text
| QuoteOpen
| QuoteClose
| Space
@ -125,28 +126,25 @@ headers =
, header6
]
delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken]
delimitedMaybe openMark closeMark openToken closeToken = do
openPos <- getPosition
void openMark
tokenPos <- getPosition
content <- anyUntil closeMark
closePos <- getPosition
void closeMark
let openTok :: LocatedToken = (openPos, openToken)
res :: LocatedToken = (tokenPos, Token content)
closeToks :: [LocatedToken] = case closeToken of
Just close -> [(closePos, close)]
Nothing -> []
pure $ [openTok, res] <> closeToks
anyUntil :: Parser a -> Parser Text
anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p)
delimited :: Parser a -> Parser a -> Token -> Token -> Parser [LocatedToken]
delimited a b c d = delimitedMaybe a b c (Just d)
delimitedAsTuple :: Parser open -> Parser close -> Parser (Located open, LocatedToken, Located close)
delimitedAsTuple openP closeP =
(,,)
<$> located openP
<*> located (Token <$> anyUntil closeP)
<*> located closeP
delimited :: Parser a -> Parser b -> Token -> Token -> Parser [LocatedToken]
delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP)
where
asList (a, tok, b) = [a, tok, b]
delimitedNoTrailing :: Parser a -> Parser b -> Token -> Parser [LocatedToken]
delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
where
asList (a, tok, _) = [a, tok]
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
delimitedSymmetric s t1 t2 = delimited s s t1 t2
@ -154,61 +152,59 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2
eol :: Parser ()
eol = void "\n" <|> void "\r\n" <|> Parsec.eof
anchorHash :: Parser Text
anchorHash = "#" <|> try "\\#"
header1 :: Lexer
header1 = delimitedMaybe (void $ "= ") eol (Header One) Nothing
header1 = delimitedNoTrailing "= " eol (Header One)
header2 :: Lexer
header2 = delimitedMaybe (void $ "== ") eol (Header Two) Nothing
header2 = delimitedNoTrailing "== " eol (Header Two)
header3 :: Lexer
header3 = delimitedMaybe (void $ "=== ") eol (Header Three) Nothing
header3 = delimitedNoTrailing "=== " eol (Header Three)
header4 :: Lexer
header4 = delimitedMaybe (void $ "==== ") eol (Header Four) Nothing
header4 = delimitedNoTrailing "==== " eol (Header Four)
header5 :: Lexer
header5 = delimitedMaybe (void $ "===== ") eol (Header Five) Nothing
header5 = delimitedNoTrailing "===== " eol (Header Five)
header6 :: Lexer
header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing
header6 = delimitedNoTrailing "====== " eol (Header Six)
-- #anchors#
anchors :: Lexer
anchors = do
pos <- getPosition
void $ try anchor'
txt <- anyUntil anchor'
void $ try anchor'
anchors =
tokenise
[ between anchorHash anchorHash (Anchor <$> anyUntil anchorHash)
]
pure [(pos, Anchor txt)]
where
anchor' = (string "#" <|> string "\\#")
moduleName :: Parser Text
moduleName = intercalate "." . fmap Text.pack <$> upperId `sepBy1` char '.'
upperId :: Parser String
upperId = (:) <$> satisfy isUpper <*> many1 identifierChar
identifierChar :: Parser Char
identifierChar = satisfy (\c -> isAlphaNum c || c == '_')
-- "Module.Name"
-- "Module.Name#anchor"
-- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben
modules :: Lexer
modules = do
startPos <- startPosition $ char '"'
(modPos, modName) <- located modId
anch <- option [] do
anchPos <- startPosition (string "#" <|> string' "\\#")
txt <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
pure [(anchPos, Anchor txt)]
void $ char '"'
pure $ [(startPos, Module), (modPos, Token modName)] <> anch
modules = between (char '"') (char '"') inner
where
modId = intercalate "." <$> (fmap Text.pack <$> (conId `sepBy1` (char '.')))
inner = do
module_ <- located $ Module <$> moduleName
mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText))
pure $ case mAnchor of
Just anchor -> [module_, anchor]
Nothing -> [module_]
conId :: Parser String
conId =
(:)
<$> satisfy (\c -> isAlpha c && isUpper c)
<*> many1 conChar
conChar :: Parser Char
conChar = satisfy (\c -> isAlphaNum c || c == '_')
anchorText :: Parser Text
anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
linkRaw :: Lexer
linkRaw =
@ -247,16 +243,16 @@ labeledLink = do
]
mathsBracket :: Lexer
mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose
mathsBracket = delimited "\\[" "\\]" MathsBracketOpen MathsBracketClose
mathsParens :: Lexer
mathsParens = delimited (void $ "\\(") (void "\\)") MathsParenOpen MathsParenClose
mathsParens = delimited "\\(" "\\)" MathsParenOpen MathsParenClose
birdTrack :: Lexer
birdTrack = delimitedMaybe (void ">> ") eol BirdTrack Nothing
birdTrack = delimitedNoTrailing ">> " eol BirdTrack
escape :: Lexer
escape = delimitedMaybe (void "\\") eol Escape Nothing
escape = delimitedNoTrailing "\\" eol Escape
quotes :: Lexer
quotes = delimitedSymmetric "\"" QuoteOpen QuoteClose

View file

@ -44,19 +44,16 @@ main = hspec $ do
modules :: Expectation
modules = do
"\"MyModule.Name\""
`shouldLexTo` [ (1, 1, Module)
, (1, 2, Token "MyModule.Name")
`shouldLexTo` [ (1, 2, Module "MyModule.Name")
]
"\"OtherModule.Name#myAnchor\""
`shouldLexTo` [ (1, 1, Module)
, (1, 2, Token "OtherModule.Name")
`shouldLexTo` [ (1, 2, Module "OtherModule.Name")
, (1, 18, Anchor "myAnchor")
]
"\"OtherModule.Name\\#myAnchor\""
`shouldLexTo` [ (1, 1, Module)
, (1, 2, Token "OtherModule.Name")
`shouldLexTo` [ (1, 2, Module "OtherModule.Name")
, (1, 18, Anchor "myAnchor")
]
link :: Expectation