diff --git a/Grammar.ebnf b/Grammar.ebnf index 2404a67..bff331d 100644 --- a/Grammar.ebnf +++ b/Grammar.ebnf @@ -12,7 +12,7 @@ bold ::= '__' text_no_newline '__' monospace ::= '@' text_content '@' link ::= module_link | hyperlink | markdown_link -module_link ::= '"' module_name ( ('#' | '\#') anchor_name )? '"' +module_link ::= '"' module_name ( '#' anchor_name )? '"' hyperlink ::= '<' url ( ' ' link_text )? '>' markdown_link ::= '[' link_text '](' ( url | module_link ) ')' diff --git a/src/Lexer.hs b/src/Lexer.hs index 426a7ff..04fa84f 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -10,7 +10,7 @@ import Control.Monad (mfilter, void) import Data.Functor (($>)) import Data.Text (Text, intercalate) import Data.Text qualified as Text -import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper) +import GHC.Unicode (isAlpha, isAlphaNum, isControl, isPrint, isSpace, isUpper) import ParserMonad (Parser, initialParserState) import Text.Parsec import Text.Parsec qualified as Parsec @@ -66,6 +66,9 @@ data Token located :: Parser a -> Parser (SourcePos, a) located p = (,) <$> getPosition <*> p +startPosition :: Parser a -> Parser SourcePos +startPosition = fmap fst . located + tokenise :: [Parser a] -> Parser [(SourcePos, a)] tokenise = sequence . map located @@ -90,8 +93,8 @@ lexText = go , spaceToken , link , labeledLink - , module_ - , anchor + , modules + , anchors , textElement , quotes , birdTrack @@ -149,6 +152,9 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2 eol :: Parser () eol = void "\n" <|> void "\r\n" <|> Parsec.eof +anchorHash :: Parser Text +anchorHash = "#" <|> try "\\#" + header1 :: Lexer header1 = delimitedNoTrailing "= " eol (Header One) @@ -168,10 +174,11 @@ header6 :: Lexer header6 = delimitedNoTrailing "====== " eol (Header Six) -- #anchors# -anchor :: Lexer -anchor = do - x <- located $ between "#" "#" (Anchor <$> anyUntil "#") - pure [x] +anchors :: Lexer +anchors = + tokenise + [ between anchorHash anchorHash (Anchor <$> anyUntil anchorHash) + ] moduleNames :: Parser Text @@ -185,19 +192,16 @@ identifierChar = satisfy (\c -> isAlphaNum c || c == '_') -- "Module.Name" -- "Module.Name#anchor" --- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben -module_ :: Lexer -module_ = between (char '"') (char '"') inner +-- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben +modules :: Lexer +modules = between (char '"') (char '"') inner where inner = do - m <- located $ Module <$> moduleNames + module_ <- located $ Module <$> moduleNames mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText)) pure $ case mAnchor of - Just anc -> [m, anc] - Nothing -> [m] - - anchorHash :: Parser Text - anchorHash = "#" <|> try "\\#" + Just anchor -> [module_, anchor] + Nothing -> [module_] anchorText :: Parser Text anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) @@ -223,12 +227,20 @@ link = do labeledLink :: Lexer labeledLink = do - open <- located $ LabeledLinkOpen <$ "<" - linkRes <- linkRaw - labelRes <- located $ Token <$> anyUntil ">" - close <- located $ LabeledLinkClose <$ ">" + pos <- getPosition + void $ string "<" + link' <- linkRaw + pos7 <- getPosition + label' <- anyUntil $ string ">" + pos8 <- getPosition + void $ ">" + pure $ - open : linkRes <> [ labelRes , close ] + (pos, LabeledLinkOpen) + : link' + <> [ (pos7, Token label') + , (pos8, LabeledLinkClose) + ] mathMultiline :: Lexer mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose diff --git a/test/Spec.hs b/test/Spec.hs index 745aefa..7ddbcff 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -19,8 +19,8 @@ main = hspec $ do describe "minimal" do it "handles unicode" unicode it "escapes" escaping - it "maths" math - it "anchors" anchor + it "maths" maths + it "anchors" anchors it "space chars" space it "bare string" someString it "emphasis" emphatic @@ -84,14 +84,14 @@ labeledLink = , (1, 35, LabeledLinkClose) ] -anchor :: Expectation -anchor = +anchors :: Expectation +anchors = "#myAnchor#" `shouldLexTo` [ (1, 1, Anchor "myAnchor") ] -math :: IO () -math = do +maths :: IO () +maths = do "\\[some math\\]" `shouldLexTo` [ (1, 1, MathMultilineOpen) , (1, 3, Token "some math")