Compare commits

..

No commits in common. "970b658926e4a683f6c557539ddf750803cabd44" and "c4d59d32369a4b0b3c11fbb3e3c6915a0a362d89" have entirely different histories.

3 changed files with 40 additions and 28 deletions

View file

@ -12,7 +12,7 @@ bold ::= '__' text_no_newline '__'
monospace ::= '@' text_content '@' monospace ::= '@' text_content '@'
link ::= module_link | hyperlink | markdown_link link ::= module_link | hyperlink | markdown_link
module_link ::= '"' module_name ( ('#' | '\#') anchor_name )? '"' module_link ::= '"' module_name ( '#' anchor_name )? '"'
hyperlink ::= '<' url ( ' ' link_text )? '>' hyperlink ::= '<' url ( ' ' link_text )? '>'
markdown_link ::= '[' link_text '](' ( url | module_link ) ')' markdown_link ::= '[' link_text '](' ( url | module_link ) ')'

View file

@ -10,7 +10,7 @@ import Control.Monad (mfilter, void)
import Data.Functor (($>)) import Data.Functor (($>))
import Data.Text (Text, intercalate) import Data.Text (Text, intercalate)
import Data.Text qualified as Text import Data.Text qualified as Text
import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper) import GHC.Unicode (isAlpha, isAlphaNum, isControl, isPrint, isSpace, isUpper)
import ParserMonad (Parser, initialParserState) import ParserMonad (Parser, initialParserState)
import Text.Parsec import Text.Parsec
import Text.Parsec qualified as Parsec import Text.Parsec qualified as Parsec
@ -66,6 +66,9 @@ data Token
located :: Parser a -> Parser (SourcePos, a) located :: Parser a -> Parser (SourcePos, a)
located p = (,) <$> getPosition <*> p located p = (,) <$> getPosition <*> p
startPosition :: Parser a -> Parser SourcePos
startPosition = fmap fst . located
tokenise :: [Parser a] -> Parser [(SourcePos, a)] tokenise :: [Parser a] -> Parser [(SourcePos, a)]
tokenise = sequence . map located tokenise = sequence . map located
@ -90,8 +93,8 @@ lexText = go
, spaceToken , spaceToken
, link , link
, labeledLink , labeledLink
, module_ , modules
, anchor , anchors
, textElement , textElement
, quotes , quotes
, birdTrack , birdTrack
@ -149,6 +152,9 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2
eol :: Parser () eol :: Parser ()
eol = void "\n" <|> void "\r\n" <|> Parsec.eof eol = void "\n" <|> void "\r\n" <|> Parsec.eof
anchorHash :: Parser Text
anchorHash = "#" <|> try "\\#"
header1 :: Lexer header1 :: Lexer
header1 = delimitedNoTrailing "= " eol (Header One) header1 = delimitedNoTrailing "= " eol (Header One)
@ -168,10 +174,11 @@ header6 :: Lexer
header6 = delimitedNoTrailing "====== " eol (Header Six) header6 = delimitedNoTrailing "====== " eol (Header Six)
-- #anchors# -- #anchors#
anchor :: Lexer anchors :: Lexer
anchor = do anchors =
x <- located $ between "#" "#" (Anchor <$> anyUntil "#") tokenise
pure [x] [ between anchorHash anchorHash (Anchor <$> anyUntil anchorHash)
]
moduleNames :: Parser Text moduleNames :: Parser Text
@ -185,19 +192,16 @@ identifierChar = satisfy (\c -> isAlphaNum c || c == '_')
-- "Module.Name" -- "Module.Name"
-- "Module.Name#anchor" -- "Module.Name#anchor"
-- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben -- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben
module_ :: Lexer modules :: Lexer
module_ = between (char '"') (char '"') inner modules = between (char '"') (char '"') inner
where where
inner = do inner = do
m <- located $ Module <$> moduleNames module_ <- located $ Module <$> moduleNames
mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText)) mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText))
pure $ case mAnchor of pure $ case mAnchor of
Just anc -> [m, anc] Just anchor -> [module_, anchor]
Nothing -> [m] Nothing -> [module_]
anchorHash :: Parser Text
anchorHash = "#" <|> try "\\#"
anchorText :: Parser Text anchorText :: Parser Text
anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
@ -223,12 +227,20 @@ link = do
labeledLink :: Lexer labeledLink :: Lexer
labeledLink = do labeledLink = do
open <- located $ LabeledLinkOpen <$ "<" pos <- getPosition
linkRes <- linkRaw void $ string "<"
labelRes <- located $ Token <$> anyUntil ">" link' <- linkRaw
close <- located $ LabeledLinkClose <$ ">" pos7 <- getPosition
label' <- anyUntil $ string ">"
pos8 <- getPosition
void $ ">"
pure $ pure $
open : linkRes <> [ labelRes , close ] (pos, LabeledLinkOpen)
: link'
<> [ (pos7, Token label')
, (pos8, LabeledLinkClose)
]
mathMultiline :: Lexer mathMultiline :: Lexer
mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose

View file

@ -19,8 +19,8 @@ main = hspec $ do
describe "minimal" do describe "minimal" do
it "handles unicode" unicode it "handles unicode" unicode
it "escapes" escaping it "escapes" escaping
it "maths" math it "maths" maths
it "anchors" anchor it "anchors" anchors
it "space chars" space it "space chars" space
it "bare string" someString it "bare string" someString
it "emphasis" emphatic it "emphasis" emphatic
@ -84,14 +84,14 @@ labeledLink =
, (1, 35, LabeledLinkClose) , (1, 35, LabeledLinkClose)
] ]
anchor :: Expectation anchors :: Expectation
anchor = anchors =
"#myAnchor#" "#myAnchor#"
`shouldLexTo` [ (1, 1, Anchor "myAnchor") `shouldLexTo` [ (1, 1, Anchor "myAnchor")
] ]
math :: IO () maths :: IO ()
math = do maths = do
"\\[some math\\]" "\\[some math\\]"
`shouldLexTo` [ (1, 1, MathMultilineOpen) `shouldLexTo` [ (1, 1, MathMultilineOpen)
, (1, 3, Token "some math") , (1, 3, Token "some math")