Compare commits

..

4 commits

3 changed files with 28 additions and 40 deletions

View file

@ -12,7 +12,7 @@ bold ::= '__' text_no_newline '__'
monospace ::= '@' text_content '@'
link ::= module_link | hyperlink | markdown_link
module_link ::= '"' module_name ( '#' anchor_name )? '"'
module_link ::= '"' module_name ( ('#' | '\#') anchor_name )? '"'
hyperlink ::= '<' url ( ' ' link_text )? '>'
markdown_link ::= '[' link_text '](' ( url | module_link ) ')'

View file

@ -10,7 +10,7 @@ import Control.Monad (mfilter, void)
import Data.Functor (($>))
import Data.Text (Text, intercalate)
import Data.Text qualified as Text
import GHC.Unicode (isAlpha, isAlphaNum, isControl, isPrint, isSpace, isUpper)
import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper)
import ParserMonad (Parser, initialParserState)
import Text.Parsec
import Text.Parsec qualified as Parsec
@ -66,9 +66,6 @@ data Token
located :: Parser a -> Parser (SourcePos, a)
located p = (,) <$> getPosition <*> p
startPosition :: Parser a -> Parser SourcePos
startPosition = fmap fst . located
tokenise :: [Parser a] -> Parser [(SourcePos, a)]
tokenise = sequence . map located
@ -93,8 +90,8 @@ lexText = go
, spaceToken
, link
, labeledLink
, modules
, anchors
, module_
, anchor
, textElement
, quotes
, birdTrack
@ -152,9 +149,6 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2
eol :: Parser ()
eol = void "\n" <|> void "\r\n" <|> Parsec.eof
anchorHash :: Parser Text
anchorHash = "#" <|> try "\\#"
header1 :: Lexer
header1 = delimitedNoTrailing "= " eol (Header One)
@ -174,11 +168,10 @@ header6 :: Lexer
header6 = delimitedNoTrailing "====== " eol (Header Six)
-- #anchors#
anchors :: Lexer
anchors =
tokenise
[ between anchorHash anchorHash (Anchor <$> anyUntil anchorHash)
]
anchor :: Lexer
anchor = do
x <- located $ between "#" "#" (Anchor <$> anyUntil "#")
pure [x]
moduleNames :: Parser Text
@ -192,16 +185,19 @@ identifierChar = satisfy (\c -> isAlphaNum c || c == '_')
-- "Module.Name"
-- "Module.Name#anchor"
-- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben
modules :: Lexer
modules = between (char '"') (char '"') inner
-- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben
module_ :: Lexer
module_ = between (char '"') (char '"') inner
where
inner = do
module_ <- located $ Module <$> moduleNames
m <- located $ Module <$> moduleNames
mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText))
pure $ case mAnchor of
Just anchor -> [module_, anchor]
Nothing -> [module_]
Just anc -> [m, anc]
Nothing -> [m]
anchorHash :: Parser Text
anchorHash = "#" <|> try "\\#"
anchorText :: Parser Text
anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
@ -227,20 +223,12 @@ link = do
labeledLink :: Lexer
labeledLink = do
pos <- getPosition
void $ string "<"
link' <- linkRaw
pos7 <- getPosition
label' <- anyUntil $ string ">"
pos8 <- getPosition
void $ ">"
open <- located $ LabeledLinkOpen <$ "<"
linkRes <- linkRaw
labelRes <- located $ Token <$> anyUntil ">"
close <- located $ LabeledLinkClose <$ ">"
pure $
(pos, LabeledLinkOpen)
: link'
<> [ (pos7, Token label')
, (pos8, LabeledLinkClose)
]
open : linkRes <> [ labelRes , close ]
mathMultiline :: Lexer
mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose

View file

@ -19,8 +19,8 @@ main = hspec $ do
describe "minimal" do
it "handles unicode" unicode
it "escapes" escaping
it "maths" maths
it "anchors" anchors
it "maths" math
it "anchors" anchor
it "space chars" space
it "bare string" someString
it "emphasis" emphatic
@ -84,14 +84,14 @@ labeledLink =
, (1, 35, LabeledLinkClose)
]
anchors :: Expectation
anchors =
anchor :: Expectation
anchor =
"#myAnchor#"
`shouldLexTo` [ (1, 1, Anchor "myAnchor")
]
maths :: IO ()
maths = do
math :: IO ()
math = do
"\\[some math\\]"
`shouldLexTo` [ (1, 1, MathMultilineOpen)
, (1, 3, Token "some math")