Compare commits

..

3 commits

Author SHA1 Message Date
20b5ffac36
ref(moduleName): break into multiple smaller functions
upperId has been changed to only use isUpper because an non alphabetical
character would be false anyway
2025-09-24 20:47:55 +08:00
c956c97e0c
ref(lexer): simplify anchor 2025-09-24 20:25:48 +08:00
064e253f03
ref(lexer): simplify delimited logic 2025-09-24 20:25:14 +08:00
2 changed files with 33 additions and 38 deletions

View file

@ -56,7 +56,7 @@ data Token
| MathsBracketOpen | MathsBracketOpen
| MathsBracketClose | MathsBracketClose
| NumericEntity Int | NumericEntity Int
| Module | Module Text
| QuoteOpen | QuoteOpen
| QuoteClose | QuoteClose
| Space | Space
@ -137,14 +137,14 @@ delimitedAsTuple openP closeP =
<*> located closeP <*> located closeP
delimited :: Parser a -> Parser b -> Token -> Token -> Parser [LocatedToken] delimited :: Parser a -> Parser b -> Token -> Token -> Parser [LocatedToken]
delimited openP closeP openTok closeTok = fuse <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP) delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP)
where where
fuse (a, tok, b) = [a, tok, b] asList (a, tok, b) = [a, tok, b]
delimitedNoTrailing :: Parser a -> Parser b -> Token -> Parser [LocatedToken] delimitedNoTrailing :: Parser a -> Parser b -> Token -> Parser [LocatedToken]
delimitedNoTrailing openP closeP openTok = fuse <$> delimitedAsTuple (openTok <$ openP) (void closeP) delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
where where
fuse (a, tok, _) = [a, tok] asList (a, tok, _) = [a, tok]
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
delimitedSymmetric s t1 t2 = delimited s s t1 t2 delimitedSymmetric s t1 t2 = delimited s s t1 t2
@ -152,6 +152,9 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2
eol :: Parser () eol :: Parser ()
eol = void "\n" <|> void "\r\n" <|> Parsec.eof eol = void "\n" <|> void "\r\n" <|> Parsec.eof
anchorHash :: Parser Text
anchorHash = "#" <|> try "\\#"
header1 :: Lexer header1 :: Lexer
header1 = delimitedNoTrailing "= " eol (Header One) header1 = delimitedNoTrailing "= " eol (Header One)
@ -172,41 +175,36 @@ header6 = delimitedNoTrailing "====== " eol (Header Six)
-- #anchors# -- #anchors#
anchors :: Lexer anchors :: Lexer
anchors = do anchors =
pos <- getPosition tokenise
void $ try anchor' [ between anchorHash anchorHash (Anchor <$> anyUntil anchorHash)
txt <- anyUntil anchor' ]
void $ try anchor'
pure [(pos, Anchor txt)]
where moduleName :: Parser Text
anchor' = (string "#" <|> string "\\#") moduleName = intercalate "." . fmap Text.pack <$> upperId `sepBy1` char '.'
upperId :: Parser String
upperId = (:) <$> satisfy isUpper <*> many1 identifierChar
identifierChar :: Parser Char
identifierChar = satisfy (\c -> isAlphaNum c || c == '_')
-- "Module.Name" -- "Module.Name"
-- "Module.Name#anchor" -- "Module.Name#anchor"
-- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben -- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben
modules :: Lexer modules :: Lexer
modules = do modules = between (char '"') (char '"') inner
startPos <- startPosition $ char '"'
(modPos, modName) <- located modId
anch <- option [] do
anchPos <- startPosition (string "#" <|> string' "\\#")
txt <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
pure [(anchPos, Anchor txt)]
void $ char '"'
pure $ [(startPos, Module), (modPos, Token modName)] <> anch
where where
modId = intercalate "." <$> (fmap Text.pack <$> (conId `sepBy1` (char '.'))) inner = do
module_ <- located $ Module <$> moduleName
mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText))
pure $ case mAnchor of
Just anchor -> [module_, anchor]
Nothing -> [module_]
conId :: Parser String anchorText :: Parser Text
conId = anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
(:)
<$> satisfy (\c -> isAlpha c && isUpper c)
<*> many1 conChar
conChar :: Parser Char
conChar = satisfy (\c -> isAlphaNum c || c == '_')
linkRaw :: Lexer linkRaw :: Lexer
linkRaw = linkRaw =

View file

@ -44,19 +44,16 @@ main = hspec $ do
modules :: Expectation modules :: Expectation
modules = do modules = do
"\"MyModule.Name\"" "\"MyModule.Name\""
`shouldLexTo` [ (1, 1, Module) `shouldLexTo` [ (1, 2, Module "MyModule.Name")
, (1, 2, Token "MyModule.Name")
] ]
"\"OtherModule.Name#myAnchor\"" "\"OtherModule.Name#myAnchor\""
`shouldLexTo` [ (1, 1, Module) `shouldLexTo` [ (1, 2, Module "OtherModule.Name")
, (1, 2, Token "OtherModule.Name")
, (1, 18, Anchor "myAnchor") , (1, 18, Anchor "myAnchor")
] ]
"\"OtherModule.Name\\#myAnchor\"" "\"OtherModule.Name\\#myAnchor\""
`shouldLexTo` [ (1, 1, Module) `shouldLexTo` [ (1, 2, Module "OtherModule.Name")
, (1, 2, Token "OtherModule.Name")
, (1, 18, Anchor "myAnchor") , (1, 18, Anchor "myAnchor")
] ]
link :: Expectation link :: Expectation