Compare commits

..

No commits in common. "970b658926e4a683f6c557539ddf750803cabd44" and "c4d59d32369a4b0b3c11fbb3e3c6915a0a362d89" have entirely different histories.

3 changed files with 40 additions and 28 deletions

View file

@ -12,7 +12,7 @@ bold ::= '__' text_no_newline '__'
monospace ::= '@' text_content '@'
link ::= module_link | hyperlink | markdown_link
module_link ::= '"' module_name ( ('#' | '\#') anchor_name )? '"'
module_link ::= '"' module_name ( '#' anchor_name )? '"'
hyperlink ::= '<' url ( ' ' link_text )? '>'
markdown_link ::= '[' link_text '](' ( url | module_link ) ')'

View file

@ -10,7 +10,7 @@ import Control.Monad (mfilter, void)
import Data.Functor (($>))
import Data.Text (Text, intercalate)
import Data.Text qualified as Text
import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper)
import GHC.Unicode (isAlpha, isAlphaNum, isControl, isPrint, isSpace, isUpper)
import ParserMonad (Parser, initialParserState)
import Text.Parsec
import Text.Parsec qualified as Parsec
@ -66,6 +66,9 @@ data Token
located :: Parser a -> Parser (SourcePos, a)
located p = (,) <$> getPosition <*> p
startPosition :: Parser a -> Parser SourcePos
startPosition = fmap fst . located
tokenise :: [Parser a] -> Parser [(SourcePos, a)]
tokenise = sequence . map located
@ -90,8 +93,8 @@ lexText = go
, spaceToken
, link
, labeledLink
, module_
, anchor
, modules
, anchors
, textElement
, quotes
, birdTrack
@ -149,6 +152,9 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2
eol :: Parser ()
eol = void "\n" <|> void "\r\n" <|> Parsec.eof
anchorHash :: Parser Text
anchorHash = "#" <|> try "\\#"
header1 :: Lexer
header1 = delimitedNoTrailing "= " eol (Header One)
@ -168,10 +174,11 @@ header6 :: Lexer
header6 = delimitedNoTrailing "====== " eol (Header Six)
-- #anchors#
anchor :: Lexer
anchor = do
x <- located $ between "#" "#" (Anchor <$> anyUntil "#")
pure [x]
anchors :: Lexer
anchors =
tokenise
[ between anchorHash anchorHash (Anchor <$> anyUntil anchorHash)
]
moduleNames :: Parser Text
@ -185,19 +192,16 @@ identifierChar = satisfy (\c -> isAlphaNum c || c == '_')
-- "Module.Name"
-- "Module.Name#anchor"
-- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben
module_ :: Lexer
module_ = between (char '"') (char '"') inner
-- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben
modules :: Lexer
modules = between (char '"') (char '"') inner
where
inner = do
m <- located $ Module <$> moduleNames
module_ <- located $ Module <$> moduleNames
mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText))
pure $ case mAnchor of
Just anc -> [m, anc]
Nothing -> [m]
anchorHash :: Parser Text
anchorHash = "#" <|> try "\\#"
Just anchor -> [module_, anchor]
Nothing -> [module_]
anchorText :: Parser Text
anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
@ -223,12 +227,20 @@ link = do
labeledLink :: Lexer
labeledLink = do
open <- located $ LabeledLinkOpen <$ "<"
linkRes <- linkRaw
labelRes <- located $ Token <$> anyUntil ">"
close <- located $ LabeledLinkClose <$ ">"
pos <- getPosition
void $ string "<"
link' <- linkRaw
pos7 <- getPosition
label' <- anyUntil $ string ">"
pos8 <- getPosition
void $ ">"
pure $
open : linkRes <> [ labelRes , close ]
(pos, LabeledLinkOpen)
: link'
<> [ (pos7, Token label')
, (pos8, LabeledLinkClose)
]
mathMultiline :: Lexer
mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose

View file

@ -19,8 +19,8 @@ main = hspec $ do
describe "minimal" do
it "handles unicode" unicode
it "escapes" escaping
it "maths" math
it "anchors" anchor
it "maths" maths
it "anchors" anchors
it "space chars" space
it "bare string" someString
it "emphasis" emphatic
@ -84,14 +84,14 @@ labeledLink =
, (1, 35, LabeledLinkClose)
]
anchor :: Expectation
anchor =
anchors :: Expectation
anchors =
"#myAnchor#"
`shouldLexTo` [ (1, 1, Anchor "myAnchor")
]
math :: IO ()
math = do
maths :: IO ()
maths = do
"\\[some math\\]"
`shouldLexTo` [ (1, 1, MathMultilineOpen)
, (1, 3, Token "some math")