Compare commits
7 commits
e9280d63f8
...
6c0b4a4288
| Author | SHA1 | Date | |
|---|---|---|---|
| 6c0b4a4288 | |||
| 75c4817166 | |||
| 7ceb9b0277 | |||
| 368e5bc9a0 | |||
| d6087ec3d6 | |||
| fdb9070e99 | |||
| 39cfe2035d |
2 changed files with 65 additions and 66 deletions
114
src/Lexer.hs
114
src/Lexer.hs
|
|
@ -16,6 +16,7 @@ import Text.Parsec
|
|||
import Text.Parsec qualified as Parsec
|
||||
import Text.Parsec.Pos (updatePosChar)
|
||||
|
||||
type Located a = (SourcePos, a)
|
||||
type LocatedToken = (SourcePos, Token)
|
||||
|
||||
type Lexer = Parser [LocatedToken]
|
||||
|
|
@ -55,7 +56,7 @@ data Token
|
|||
| MathsBracketOpen
|
||||
| MathsBracketClose
|
||||
| NumericEntity Int
|
||||
| Module
|
||||
| Module Text
|
||||
| QuoteOpen
|
||||
| QuoteClose
|
||||
| Space
|
||||
|
|
@ -125,90 +126,85 @@ headers =
|
|||
, header6
|
||||
]
|
||||
|
||||
delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken]
|
||||
delimitedMaybe openMark closeMark openToken closeToken = do
|
||||
openPos <- getPosition
|
||||
void openMark
|
||||
tokenPos <- getPosition
|
||||
content <- anyUntil closeMark
|
||||
closePos <- getPosition
|
||||
void closeMark
|
||||
|
||||
let openTok :: LocatedToken = (openPos, openToken)
|
||||
res :: LocatedToken = (tokenPos, Token content)
|
||||
closeToks :: [LocatedToken] = case closeToken of
|
||||
Just close -> [(closePos, close)]
|
||||
Nothing -> []
|
||||
|
||||
pure $ [openTok, res] <> closeToks
|
||||
|
||||
anyUntil :: Parser a -> Parser Text
|
||||
anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p)
|
||||
|
||||
delimited :: Parser a -> Parser a -> Token -> Token -> Parser [LocatedToken]
|
||||
delimited a b c d = delimitedMaybe a b c (Just d)
|
||||
delimitedAsTuple :: Parser open -> Parser close -> Parser (Located open, LocatedToken, Located close)
|
||||
delimitedAsTuple openP closeP =
|
||||
(,,)
|
||||
<$> located openP
|
||||
<*> located (Token <$> anyUntil closeP)
|
||||
<*> located closeP
|
||||
|
||||
delimited :: Parser a -> Parser b -> Token -> Token -> Parser [LocatedToken]
|
||||
delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP)
|
||||
where
|
||||
asList (a, tok, b) = [a, tok, b]
|
||||
|
||||
delimitedNoTrailing :: Parser a -> Parser b -> Token -> Parser [LocatedToken]
|
||||
delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
|
||||
where
|
||||
asList (a, tok, _) = [a, tok]
|
||||
|
||||
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
|
||||
delimitedSymmetric s t1 t2 = delimited s s t1 t2
|
||||
|
||||
eol :: Parser ()
|
||||
eol = void "\n" <|> Parsec.eof
|
||||
eol = void "\n" <|> void "\r\n" <|> Parsec.eof
|
||||
|
||||
anchorHash :: Parser Text
|
||||
anchorHash = "#" <|> try "\\#"
|
||||
|
||||
header1 :: Lexer
|
||||
header1 = delimitedMaybe (void $ "= ") eol (Header One) Nothing
|
||||
header1 = delimitedNoTrailing "= " eol (Header One)
|
||||
|
||||
header2 :: Lexer
|
||||
header2 = delimitedMaybe (void $ "== ") eol (Header Two) Nothing
|
||||
header2 = delimitedNoTrailing "== " eol (Header Two)
|
||||
|
||||
header3 :: Lexer
|
||||
header3 = delimitedMaybe (void $ "=== ") eol (Header Three) Nothing
|
||||
header3 = delimitedNoTrailing "=== " eol (Header Three)
|
||||
|
||||
header4 :: Lexer
|
||||
header4 = delimitedMaybe (void $ "==== ") eol (Header Four) Nothing
|
||||
header4 = delimitedNoTrailing "==== " eol (Header Four)
|
||||
|
||||
header5 :: Lexer
|
||||
header5 = delimitedMaybe (void $ "===== ") eol (Header Five) Nothing
|
||||
header5 = delimitedNoTrailing "===== " eol (Header Five)
|
||||
|
||||
header6 :: Lexer
|
||||
header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing
|
||||
header6 = delimitedNoTrailing "====== " eol (Header Six)
|
||||
|
||||
-- #anchors#
|
||||
anchors :: Lexer
|
||||
anchors = do
|
||||
pos <- getPosition
|
||||
void $ try anchor'
|
||||
txt <- anyUntil anchor'
|
||||
void $ try anchor'
|
||||
anchors =
|
||||
tokenise
|
||||
[ between anchorHash anchorHash (Anchor <$> anyUntil anchorHash)
|
||||
]
|
||||
|
||||
pure [(pos, Anchor txt)]
|
||||
where
|
||||
anchor' = (string "#" <|> string "\\#")
|
||||
|
||||
moduleNames :: Parser Text
|
||||
moduleNames = intercalate "." . fmap Text.pack <$> upperId `sepBy1` char '.'
|
||||
|
||||
upperId :: Parser String
|
||||
upperId = (:) <$> satisfy isUpper <*> many1 identifierChar
|
||||
|
||||
identifierChar :: Parser Char
|
||||
identifierChar = satisfy (\c -> isAlphaNum c || c == '_')
|
||||
|
||||
-- "Module.Name"
|
||||
-- "Module.Name#anchor"
|
||||
-- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben
|
||||
modules :: Lexer
|
||||
modules = do
|
||||
startPos <- startPosition $ char '"'
|
||||
(modPos, modName) <- located modId
|
||||
anch <- option [] do
|
||||
anchPos <- startPosition (string "#" <|> string' "\\#")
|
||||
txt <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
|
||||
pure [(anchPos, Anchor txt)]
|
||||
|
||||
void $ char '"'
|
||||
pure $ [(startPos, Module), (modPos, Token modName)] <> anch
|
||||
modules = between (char '"') (char '"') inner
|
||||
where
|
||||
modId = intercalate "." <$> (fmap Text.pack <$> (conId `sepBy1` (char '.')))
|
||||
inner = do
|
||||
module_ <- located $ Module <$> moduleNames
|
||||
mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText))
|
||||
pure $ case mAnchor of
|
||||
Just anchor -> [module_, anchor]
|
||||
Nothing -> [module_]
|
||||
|
||||
conId :: Parser String
|
||||
conId =
|
||||
(:)
|
||||
<$> satisfy (\c -> isAlpha c && isUpper c)
|
||||
<*> many1 conChar
|
||||
|
||||
conChar :: Parser Char
|
||||
conChar = satisfy (\c -> isAlphaNum c || c == '_')
|
||||
anchorText :: Parser Text
|
||||
anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
|
||||
|
||||
linkRaw :: Lexer
|
||||
linkRaw =
|
||||
|
|
@ -225,7 +221,7 @@ link :: Lexer
|
|||
link = do
|
||||
pos <- getPosition
|
||||
l <- linkRaw
|
||||
-- "unconsume" the last token
|
||||
-- register the position of the last token
|
||||
pos' <- flip incSourceColumn (-1) <$> getPosition
|
||||
pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)]
|
||||
|
||||
|
|
@ -247,16 +243,16 @@ labeledLink = do
|
|||
]
|
||||
|
||||
mathsBracket :: Lexer
|
||||
mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose
|
||||
mathsBracket = delimited "\\[" "\\]" MathsBracketOpen MathsBracketClose
|
||||
|
||||
mathsParens :: Lexer
|
||||
mathsParens = delimited (void $ "\\(") (void "\\)") MathsParenOpen MathsParenClose
|
||||
mathsParens = delimited "\\(" "\\)" MathsParenOpen MathsParenClose
|
||||
|
||||
birdTrack :: Lexer
|
||||
birdTrack = delimitedMaybe (void ">> ") eol BirdTrack Nothing
|
||||
birdTrack = delimitedNoTrailing ">> " eol BirdTrack
|
||||
|
||||
escape :: Lexer
|
||||
escape = delimitedMaybe (void "\\") eol Escape Nothing
|
||||
escape = delimitedNoTrailing "\\" eol Escape
|
||||
|
||||
quotes :: Lexer
|
||||
quotes = delimitedSymmetric "\"" QuoteOpen QuoteClose
|
||||
|
|
|
|||
17
test/Spec.hs
17
test/Spec.hs
|
|
@ -11,6 +11,7 @@ import Types
|
|||
import Data.String (IsString (..))
|
||||
import Data.Text (Text)
|
||||
import Text.Parsec.Pos
|
||||
import GHC.Stack
|
||||
|
||||
main :: IO ()
|
||||
main = hspec $ do
|
||||
|
|
@ -44,19 +45,16 @@ main = hspec $ do
|
|||
modules :: Expectation
|
||||
modules = do
|
||||
"\"MyModule.Name\""
|
||||
`shouldLexTo` [ (1, 1, Module)
|
||||
, (1, 2, Token "MyModule.Name")
|
||||
`shouldLexTo` [ (1, 2, Module "MyModule.Name")
|
||||
]
|
||||
|
||||
"\"OtherModule.Name#myAnchor\""
|
||||
`shouldLexTo` [ (1, 1, Module)
|
||||
, (1, 2, Token "OtherModule.Name")
|
||||
`shouldLexTo` [ (1, 2, Module "OtherModule.Name")
|
||||
, (1, 18, Anchor "myAnchor")
|
||||
]
|
||||
|
||||
"\"OtherModule.Name\\#myAnchor\""
|
||||
`shouldLexTo` [ (1, 1, Module)
|
||||
, (1, 2, Token "OtherModule.Name")
|
||||
`shouldLexTo` [ (1, 2, Module "OtherModule.Name")
|
||||
, (1, 18, Anchor "myAnchor")
|
||||
]
|
||||
link :: Expectation
|
||||
|
|
@ -106,11 +104,15 @@ maths = do
|
|||
]
|
||||
|
||||
escaping :: Expectation
|
||||
escaping =
|
||||
escaping = do
|
||||
"\\("
|
||||
`shouldLexTo` [ (1, 1, Escape)
|
||||
, (1, 2, Token "(")
|
||||
]
|
||||
"\\(\r\n"
|
||||
`shouldLexTo` [ (1, 1, Escape)
|
||||
, (1, 2, Token "(")
|
||||
]
|
||||
|
||||
unicode :: Expectation
|
||||
unicode =
|
||||
|
|
@ -185,6 +187,7 @@ instance IsString (Doc String) where
|
|||
|
||||
shouldLexTo :: String -> [(Int, Int, Token)] -> Expectation
|
||||
shouldLexTo input expected =
|
||||
withFrozenCallStack $
|
||||
case lexer input of
|
||||
Right tokens -> do
|
||||
let actual = map (\(pos, tok) -> (sourceLine pos, sourceColumn pos, tok)) tokens
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue