From 8887476626fc9debe388a8382fcf52a9b01ce042 Mon Sep 17 00:00:00 2001
From: Igor Ranieri <igor@elland.me>
Date: Wed, 24 Sep 2025 09:48:01 +0200
Subject: [PATCH 1/2] Embed Anchor content inside token

---
 src/Lexer.hs   | 385 ++++++++++++++++++++++++-------------------------
 test/Spec.hs   | 227 ++++++++++++++---------------
 test/markup.md |  12 +-
 3 files changed, 311 insertions(+), 313 deletions(-)

diff --git a/src/Lexer.hs b/src/Lexer.hs
index c299d41..5b29aa8 100644
--- a/src/Lexer.hs
+++ b/src/Lexer.hs
@@ -1,9 +1,9 @@
 {-# LANGUAGE OverloadedStrings #-}
 
 module Lexer (
-  Token (..),
-  lexer,
-  emphasis,
+    Token (..),
+    lexer,
+    emphasis,
 ) where
 
 import Control.Monad (mfilter, void)
@@ -21,116 +21,117 @@ type LocatedToken = (SourcePos, Token)
 type Lexer = Parser [LocatedToken]
 
 data Level
-  = One
-  | Two
-  | Three
-  | Four
-  | Five
-  | Six
-  deriving (Eq, Show)
+    = One
+    | Two
+    | Three
+    | Four
+    | Five
+    | Six
+    deriving (Eq, Show)
 
 data Token
-  = Token Text
-  | Anchor
-  | BirdTrack
-  | BoldOpen
-  | BoldClose
-  | Escape
-  | EmphasisOpen
-  | EmphasisClose
-  | Header Level
-  | MonospaceOpen
-  | MonospaceClose
-  | Newline
-  | LinkOpen
-  | LinkClose
-  | LabeledLinkOpen
-  | LabeledLinkClose
-  | ParenOpen
-  | ParenClose
-  | BracketOpen
-  | BracketClose
-  | MathsParenOpen
-  | MathsParenClose
-  | MathsBracketOpen
-  | MathsBracketClose
-  | Module
-  | QuoteOpen
-  | QuoteClose
-  | Space
-  | EOF
-  deriving (Eq, Show)
+    = Token Text
+    | Anchor Text
+    | BirdTrack
+    | BoldOpen
+    | BoldClose
+    | Escape
+    | EmphasisOpen
+    | EmphasisClose
+    | Header Level
+    | MonospaceOpen
+    | MonospaceClose
+    | Newline
+    | LinkOpen
+    | LinkClose
+    | LabeledLinkOpen
+    | LabeledLinkClose
+    | ParenOpen
+    | ParenClose
+    | BracketOpen
+    | BracketClose
+    | MathsParenOpen
+    | MathsParenClose
+    | MathsBracketOpen
+    | MathsBracketClose
+    | NumericEntity Int
+    | Module
+    | QuoteOpen
+    | QuoteClose
+    | Space
+    | EOF
+    deriving (Eq, Show)
 
 lexer :: String -> Either ParseError [LocatedToken]
 lexer = Parsec.runParser lexText initialParserState "input" . Text.pack
 
 lexText :: Parser [LocatedToken]
 lexText = go
- where
-  go = do
-    Parsec.optionMaybe Parsec.eof >>= \case
-      Just _ -> pure []
-      Nothing -> do
-        toks <-
-          choice $
-            Parsec.try
-              <$> [ mathsBracket
-                  , mathsParens
-                  , escape -- maths go before escape to avoid mismatch
-                  , headers
-                  , newlineToken
-                  , spaceToken
-                  , link
-                  , labeledLink
-                  , modules
-                  , anchors
-                  , textElement
-                  , quotes
-                  , birdTrack
-                  , other
-                  ]
-        rest <- go
-        pure (toks <> rest)
+  where
+    go = do
+        Parsec.optionMaybe Parsec.eof >>= \case
+            Just _ -> pure []
+            Nothing -> do
+                toks <-
+                    choice $
+                        Parsec.try
+                            <$> [ mathsBracket
+                                , mathsParens
+                                , escape -- maths go before escape to avoid mismatch
+                                , headers
+                                , newlineToken
+                                , spaceToken
+                                , link
+                                , labeledLink
+                                , modules
+                                , anchors
+                                , textElement
+                                , quotes
+                                , birdTrack
+                                , other
+                                ]
+                rest <- go
+                pure (toks <> rest)
 
 -- Tokens
 
 textElement :: Parser [LocatedToken]
 textElement =
-  choice $
-    Parsec.try
-      <$> [ emphasis
-          , bold
-          , monospace
-          ]
+    choice $
+        Parsec.try
+            <$> [ emphasis
+                , bold
+                , monospace
+                ]
 
 headers :: Parser [LocatedToken]
 headers =
-  choice $
-    Parsec.try
-      <$> [ header1
-          , header2
-          , header3
-          , header4
-          , header5
-          , header6
-          ]
+    choice $
+        Parsec.try
+            <$> [ header1
+                , header2
+                , header3
+                , header4
+                , header5
+                , header6
+                ]
 
 delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken]
 delimitedMaybe openMark closeMark openToken closeToken = do
-  openPos <- getPosition
-  void openMark
-  tokenPos <- getPosition
-  content <- anyUntil closeMark
-  closePos <- getPosition
-  void closeMark
+    openPos <- getPosition
+    void openMark
+    tokenPos <- getPosition
+    content <- anyUntil closeMark
+    closePos <- getPosition
+    void closeMark
 
-  let openTok :: LocatedToken = (openPos, openToken)
-      res :: LocatedToken = (tokenPos, Token content)
-      closeToks :: [LocatedToken] = case closeToken of
-        Just close -> [(closePos, close)]
-        Nothing -> []
+    let openTok :: LocatedToken = (openPos, openToken)
+        res :: LocatedToken = (tokenPos, Token content)
+        closeToks :: [LocatedToken] = case closeToken of
+            Just close -> [(closePos, close)]
+            Nothing -> []
 
-  pure $ [openTok, res] <> closeToks
+    pure $ [openTok, res] <> closeToks
 
 anyUntil :: Parser a -> Parser Text
 anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p)
@@ -165,99 +166,91 @@ header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing
 -- #anchors#
 anchors :: Lexer
 anchors = do
-  pos <- getPosition
-  void $ try anchor'
-  pos' <- getPosition
-  txt <- anyUntil anchor'
-  pos'' <- getPosition
-  void $ try anchor'
+    pos <- getPosition
+    void $ try anchor'
+    txt <- anyUntil anchor'
+    void $ try anchor'
 
-  pure
-    [ (pos, Anchor)
-    , (pos', Token txt)
-    , (pos'', Anchor)
-    ]
- where
-  anchor' = (string "#" <|> string "\\#")
+    pure [(pos, Anchor txt)]
+  where
+    anchor' = (string "#" <|> string "\\#")
 
 -- "Module.Name"
 -- "Module.Name#anchor"
--- "Module.Name#anchor"
 modules :: Lexer
 modules = do
-  pos <- getPosition
-  void $ char '"'
-  pos' <- getPosition
-  modName <- modId
-  anch <- option [] do
-    pos'' <- getPosition
-    void $ try (string "#" <|> string "\\#")
-    pos''' <- getPosition
-    a <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
-    pure [(pos'', Anchor), (pos''', Token a)]
+    pos <- getPosition
+    void $ char '"'
+    pos' <- getPosition
+    modName <- modId
+    anch <- option [] do
+        pos'' <- getPosition
+        void $ try (string "#" <|> string "\\#")
+        a <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
+        pure [(pos'', Anchor a)]
 
-  void $ char '"'
-  pure $ [(pos, Module), (pos', Token modName)] <> anch
- where
-  modId = intercalate "." <$> (fmap Text.pack <$> (conId `sepBy1` (char '.')))
+    void $ char '"'
+    pure $ [(pos, Module), (pos', Token modName)] <> anch
+  where
+    modId = intercalate "." <$> (fmap Text.pack <$> (conId `sepBy1` (char '.')))
 
-  conId :: Parser String
-  conId =
-    (:)
-      <$> satisfy (\c -> isAlpha c && isUpper c)
-      <*> many1 conChar
+    conId :: Parser String
+    conId =
+        (:)
+            <$> satisfy (\c -> isAlpha c && isUpper c)
+            <*> many1 conChar
 
-  conChar :: Parser Char
-  conChar = satisfy (\c -> isAlphaNum c || c == '_')
+    conChar :: Parser Char
+    conChar = satisfy (\c -> isAlphaNum c || c == '_')
 
 linkRaw :: Lexer
 linkRaw = do
-  pos1 <- getPosition
-  void $ string "["
-  pos2 <- getPosition
-  text <- anyUntil $ Text.pack <$> string "]"
-  pos3 <- getPosition
-  void $ "]"
-  pos4 <- getPosition
-  void $ "("
-  pos5 <- getPosition
-  link' <- anyUntil $ Text.pack <$> string ")"
-  pos6 <- getPosition
-  void $ ")"
+    pos1 <- getPosition
+    void $ string "["
+    pos2 <- getPosition
+    text <- anyUntil $ Text.pack <$> string "]"
+    pos3 <- getPosition
+    void $ "]"
+    pos4 <- getPosition
+    void $ "("
+    pos5 <- getPosition
+    link' <- anyUntil $ Text.pack <$> string ")"
+    pos6 <- getPosition
+    void $ ")"
 
-  pure $
-    [ (pos1, BracketOpen)
-    , (pos2, Token text)
-    , (pos3, BracketClose)
-    , (pos4, ParenOpen)
-    , (pos5, Token link')
-    , (pos6, ParenClose)
-    ]
+    pure $
+        [ (pos1, BracketOpen)
+        , (pos2, Token text)
+        , (pos3, BracketClose)
+        , (pos4, ParenOpen)
+        , (pos5, Token link')
+        , (pos6, ParenClose)
+        ]
 
 link :: Lexer
 link = do
-  pos <- getPosition
-  l <- linkRaw
-  -- "unconsume" the last token
-  pos' <- flip incSourceColumn (-1) <$> getPosition
-  pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)]
+    pos <- getPosition
+    l <- linkRaw
+    -- "unconsume" the last token
+    pos' <- flip incSourceColumn (-1) <$> getPosition
+    pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)]
 
 labeledLink :: Lexer
 labeledLink = do
-  pos <- getPosition
-  void $ string "<"
-  link' <- linkRaw
-  pos7 <- getPosition
-  label' <- anyUntil $ string ">"
-  pos8 <- getPosition
-  void $ ">"
+    pos <- getPosition
+    void $ string "<"
+    link' <- linkRaw
+    pos7 <- getPosition
+    label' <- anyUntil $ string ">"
+    pos8 <- getPosition
+    void $ ">"
 
-  pure $
-    (pos, LabeledLinkOpen)
-      : link'
-        <> [ (pos7, Token label')
-           , (pos8, LabeledLinkClose)
-           ]
+    pure $
+        (pos, LabeledLinkOpen)
+            : link'
+                <> [ (pos7, Token label')
+                   , (pos8, LabeledLinkClose)
+                   ]
 
 mathsBracket :: Lexer
 mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose
@@ -285,23 +278,23 @@ monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
 
 other :: Lexer
 other = do
-  pos <- getPosition
-  c <- takeWhile1_ isUnicodeAlphaNum
-  pure . pure $ (pos, Token c)
- where
-  isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
+    pos <- getPosition
+    c <- takeWhile1_ isUnicodeAlphaNum
+    pure . pure $ (pos, Token c)
+  where
+    isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
 
 spaceToken :: Lexer
 spaceToken = do
-  pos <- getPosition
-  _ <- many1 (char ' ')
-  pure . pure $ (pos, Space)
+    pos <- getPosition
+    _ <- many1 (char ' ')
+    pure . pure $ (pos, Space)
 
 newlineToken :: Lexer
 newlineToken = do
-  pos <- getPosition
-  _ <- newline
-  pure . pure $ (pos, Newline)
+    pos <- getPosition
+    _ <- newline
+    pure . pure $ (pos, Newline)
 
 -------
 -- Helpers
@@ -310,11 +303,11 @@ newlineToken = do
 -- | Like `takeWhile`, but unconditionally take escaped characters.
 takeWhile_ :: (Char -> Bool) -> Parser Text
 takeWhile_ p = scan p_ False
- where
-  p_ escaped c
-    | escaped = Just False
-    | not $ p c = Nothing
-    | otherwise = Just (c == '\\')
+  where
+    p_ escaped c
+        | escaped = Just False
+        | not $ p c = Nothing
+        | otherwise = Just (c == '\\')
 
 -- | Like 'takeWhile1', but unconditionally take escaped characters.
 takeWhile1_ :: (Char -> Bool) -> Parser Text
@@ -324,19 +317,19 @@ takeWhile1_ = mfilter (not . Text.null) . takeWhile_
 function returns true.
 -}
 scan ::
-  -- | scan function
-  (state -> Char -> Maybe state) ->
-  -- | initial state
-  state ->
-  Parser Text
+    -- | scan function
+    (state -> Char -> Maybe state) ->
+    -- | initial state
+    state ->
+    Parser Text
 scan f initState = do
-  parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
-  (remaining, finalPos, ct) <- go input initState pos 0
-  let newState = parserState{stateInput = remaining, statePos = finalPos}
-  Parsec.setParserState newState $> Text.take ct input
- where
-  go !input' !st !posAccum !count' = case Text.uncons input' of
-    Nothing -> pure (input', posAccum, count')
-    Just (char', input'') -> case f st char' of
-      Nothing -> pure (input', posAccum, count')
-      Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
+    parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
+    (remaining, finalPos, ct) <- go input initState pos 0
+    let newState = parserState{stateInput = remaining, statePos = finalPos}
+    Parsec.setParserState newState $> Text.take ct input
+  where
+    go !input' !st !posAccum !count' = case Text.uncons input' of
+        Nothing -> pure (input', posAccum, count')
+        Just (char', input'') -> case f st char' of
+            Nothing -> pure (input', posAccum, count')
+            Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
diff --git a/test/Spec.hs b/test/Spec.hs
index 0b61570..9f76b8b 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -14,28 +14,28 @@ import Text.Parsec.Pos
 
 main :: IO ()
 main = hspec $ do
-  describe "Lexer" do
-    describe "minimal" do
-      it "handles unicode" unicode
-      it "escapes" escaping
-      it "maths" maths
-      it "anchors" anchors
-      it "space chars" space
-      it "bare string" someString
-      it "emphasis" emphatic
-      it "monospace" monospace
-      it "labeled link" labeledLink
-      it "markdown link" link
-      it "bird tracks" birdTracks
-      it "module names" modules
-      it "quotes" quotes
-      it "ignores nesting" ignoreNesting
+    describe "Lexer" do
+        describe "minimal" do
+            it "handles unicode" unicode
+            it "escapes" escaping
+            it "maths" maths
+            it "anchors" anchors
+            it "space chars" space
+            it "bare string" someString
+            it "emphasis" emphatic
+            it "monospace" monospace
+            it "labeled link" labeledLink
+            it "markdown link" link
+            it "bird tracks" birdTracks
+            it "module names" modules
+            it "quotes" quotes
+            it "ignores nesting" ignoreNesting
 
-  describe "Parser" do
-    it "Bold" do
-      "__bold__" `shouldParseTo` (DocBold (DocString "bold"))
-    it "Emphasis" do
-      "/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis"))
+    describe "Parser" do
+        it "Bold" do
+            "__bold__" `shouldParseTo` (DocBold (DocString "bold"))
+        it "Emphasis" do
+            "/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis"))
 
 ------------
 -- Tests
@@ -43,134 +43,131 @@ main = hspec $ do
 
 modules :: Expectation
 modules = do
-  "\"MyModule.Name\""
-    `shouldLexTo` [ (1, 1, Module)
-                  , (1, 2, Token "MyModule.Name")
-                  ]
+    "\"MyModule.Name\""
+        `shouldLexTo` [ (1, 1, Module)
+                      , (1, 2, Token "MyModule.Name")
+                      ]
 
-  "\"OtherModule.Name#myAnchor\""
-    `shouldLexTo` [ (1, 1, Module)
-                  , (1, 2, Token "OtherModule.Name")
-                  , (1, 18, Anchor)
-                  , (1, 19, Token "myAnchor")
-                  ]
+    "\"OtherModule.Name#myAnchor\""
+        `shouldLexTo` [ (1, 1, Module)
+                      , (1, 2, Token "OtherModule.Name")
+                      , (1, 18, Anchor "myAnchor")
+                      ]
 
 link :: Expectation
 link =
-  "[link to](http://some.website)"
-    `shouldLexTo` [ (1, 1, LinkOpen)
-                  , (1, 1, BracketOpen)
-                  , (1, 2, Token "link to")
-                  , (1, 9, BracketClose)
-                  , (1, 10, ParenOpen)
-                  , (1, 11, Token "http://some.website")
-                  , (1, 30, ParenClose)
-                  , (1, 30, LinkClose)
-                  ]
+    "[link to](http://some.website)"
+        `shouldLexTo` [ (1, 1, LinkOpen)
+                      , (1, 1, BracketOpen)
+                      , (1, 2, Token "link to")
+                      , (1, 9, BracketClose)
+                      , (1, 10, ParenOpen)
+                      , (1, 11, Token "http://some.website")
+                      , (1, 30, ParenClose)
+                      , (1, 30, LinkClose)
+                      ]
 
 labeledLink :: Expectation
 labeledLink =
-  "<[link here](http://to.here) label>"
-    `shouldLexTo` [ (1, 1, LabeledLinkOpen)
-                  , (1, 2, BracketOpen)
-                  , (1, 3, Token "link here")
-                  , (1, 12, BracketClose)
-                  , (1, 13, ParenOpen)
-                  , (1, 14, Token "http://to.here")
-                  , (1, 28, ParenClose)
-                  , (1, 29, Token " label")
-                  , (1, 35, LabeledLinkClose)
-                  ]
+    "<[link here](http://to.here) label>"
+        `shouldLexTo` [ (1, 1, LabeledLinkOpen)
+                      , (1, 2, BracketOpen)
+                      , (1, 3, Token "link here")
+                      , (1, 12, BracketClose)
+                      , (1, 13, ParenOpen)
+                      , (1, 14, Token "http://to.here")
+                      , (1, 28, ParenClose)
+                      , (1, 29, Token " label")
+                      , (1, 35, LabeledLinkClose)
+                      ]
 
 anchors :: Expectation
 anchors =
-  "#myAnchor#"
-    `shouldLexTo` [ (1, 1, Anchor)
-                  , (1, 2, Token "myAnchor")
-                  , (1, 10, Anchor)
-                  ]
+    "#myAnchor#"
+        `shouldLexTo` [ (1, 1, Anchor "myAnchor")
+                      ]
 
 maths :: IO ()
 maths = do
-  "\\[some math\\]"
-    `shouldLexTo` [ (1, 1, MathsBracketOpen)
-                  , (1, 3, Token "some math")
-                  , (1, 12, MathsBracketClose)
-                  ]
-  "\\(other maths\\)"
-    `shouldLexTo` [ (1, 1, MathsParenOpen)
-                  , (1, 3, Token "other maths")
-                  , (1, 14, MathsParenClose)
-                  ]
+    "\\[some math\\]"
+        `shouldLexTo` [ (1, 1, MathsBracketOpen)
+                      , (1, 3, Token "some math")
+                      , (1, 12, MathsBracketClose)
+                      ]
+    "\\(other maths\\)"
+        `shouldLexTo` [ (1, 1, MathsParenOpen)
+                      , (1, 3, Token "other maths")
+                      , (1, 14, MathsParenClose)
+                      ]
 
 escaping :: Expectation
 escaping =
-  "\\("
-    `shouldLexTo` [ (1, 1, Escape)
-                  , (1, 2, Token "(")
-                  ]
+    "\\("
+        `shouldLexTo` [ (1, 1, Escape)
+                      , (1, 2, Token "(")
+                      ]
 
 unicode :: Expectation
 unicode =
-  "ドラゴンクエストの冒険者🐉"
-    `shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者🐉")
-                  ]
+    "ドラゴンクエストの冒険者🐉"
+        `shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者🐉")
+                      ]
 
 ignoreNesting :: Expectation
 ignoreNesting =
-  ">/foo/"
-    `shouldLexTo` [ (1, 1, Token ">/foo/")
-                  ]
+    ">/foo/"
+        `shouldLexTo` [ (1, 1, Token ">/foo/")
+                      ]
 
 birdTracks :: Expectation
 birdTracks =
-  ">> code"
-    `shouldLexTo` [ (1, 1, BirdTrack)
-                  , (1, 4, Token "code")
-                  ]
+    ">> code"
+        `shouldLexTo` [ (1, 1, BirdTrack)
+                      , (1, 4, Token "code")
+                      ]
 
 quotes :: Expectation
 quotes =
-  "\"quoted\""
-    `shouldLexTo` [ (1, 1, QuoteOpen)
-                  , (1, 2, Token "quoted")
-                  , (1, 8, QuoteClose)
-                  ]
+    "\"quoted\""
+        `shouldLexTo` [ (1, 1, QuoteOpen)
+                      , (1, 2, Token "quoted")
+                      , (1, 8, QuoteClose)
+                      ]
 
 space :: Expectation
 space = do
-  "\n "
-    `shouldLexTo` [ (1, 1, Newline)
-                  , (2, 1, Space)
-                  ]
-  " \n"
-    `shouldLexTo` [ (1, 1, Space)
-                  , (1, 2, Newline)
-                  ]
+    "\n "
+        `shouldLexTo` [ (1, 1, Newline)
+                      , (2, 1, Space)
+                      ]
+    " \n"
+        `shouldLexTo` [ (1, 1, Space)
+                      , (1, 2, Newline)
+                      ]
 
 monospace :: Expectation
 monospace =
-  "@mono@"
-    `shouldLexTo` [ (1, 1, MonospaceOpen)
-                  , (1, 2, Token "mono")
-                  , (1, 6, MonospaceClose)
-                  ]
+    "@mono@"
+        `shouldLexTo` [ (1, 1, MonospaceOpen)
+                      , (1, 2, Token "mono")
+                      , (1, 6, MonospaceClose)
+                      ]
 
 emphatic :: Expectation
 emphatic =
-  "/emphatic/"
-    `shouldLexTo` [ (1, 1, EmphasisOpen)
-                  , (1, 2, Token "emphatic")
-                  , (1, 10, EmphasisClose)
-                  ]
+    "/emphatic/"
+        `shouldLexTo` [ (1, 1, EmphasisOpen)
+                      , (1, 2, Token "emphatic")
+                      , (1, 10, EmphasisClose)
+                      ]
 
 someString :: Expectation
 someString =
-  "some string"
-    `shouldLexTo` [ (1, 1, Token "some")
-                  , (1, 5, Space)
-                  , (1, 6, Token "string")
-                  ]
+    "some string"
+        `shouldLexTo` [ (1, 1, Token "some")
+                      , (1, 5, Space)
+                      , (1, 6, Token "string")
+                      ]
 
 --------------
 -- Helpers
@@ -179,15 +176,15 @@ someString =
 type Doc id = DocMarkup () id
 
 instance IsString (Doc String) where
-  fromString = DocString
+    fromString = DocString
 
 shouldLexTo :: String -> [(Int, Int, Token)] -> Expectation
 shouldLexTo input expected =
-  case lexer input of
-    Right tokens -> do
-      let actual = map (\(pos, tok) -> (sourceLine pos, sourceColumn pos, tok)) tokens
-      actual `shouldBe` expected
-    Left err -> expectationFailure $ "Parse error: " <> show err
+    case lexer input of
+        Right tokens -> do
+            let actual = map (\(pos, tok) -> (sourceLine pos, sourceColumn pos, tok)) tokens
+            actual `shouldBe` expected
+        Left err -> expectationFailure $ "Parse error: " <> show err
 
 shouldParseTo :: Text -> DocMarkup mod Identifier -> Expectation
 shouldParseTo input ast = parseText input `shouldBe` ast
diff --git a/test/markup.md b/test/markup.md
index befd3ed..187a525 100644
--- a/test/markup.md
+++ b/test/markup.md
@@ -27,8 +27,16 @@ ftp\://example.com
 
 ![alt text](image.png)
 
-\(mathematical expression\)
-\[mathematical expression\]
+\(mathematical 1+3 expression\)
+
+\[mathematical
+ expression
+ accross lines with + addition and such
+\]
+
+&#123
+&#x65
+&#165
 
 @
 code block content

From 7ae868932d5aa11df631a04b86e9293010610cec Mon Sep 17 00:00:00 2001
From: Igor Ranieri <igor@elland.me>
Date: Wed, 24 Sep 2025 10:13:16 +0200
Subject: [PATCH 2/2] Add located combinator; improved anchor matches with
 tests

---
 src/Lexer.hs | 22 +++++++++++++---------
 test/Spec.hs |  5 +++++
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/Lexer.hs b/src/Lexer.hs
index 5b29aa8..abced93 100644
--- a/src/Lexer.hs
+++ b/src/Lexer.hs
@@ -175,22 +175,26 @@ anchors = do
   where
     anchor' = (string "#" <|> string "\\#")
 
+located :: Parser a -> Parser (SourcePos, a)
+located p = (,) <$> getPosition <*> p
+
+startPosition :: Parser a -> Parser SourcePos
+startPosition = fmap fst . located
+
 -- "Module.Name"
 -- "Module.Name#anchor"
+-- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben
 modules :: Lexer
 modules = do
-    pos <- getPosition
-    void $ char '"'
-    pos' <- getPosition
-    modName <- modId
+    startPos <- startPosition $ char '"'
+    (modPos, modName) <- located modId
     anch <- option [] do
-        pos'' <- getPosition
-        void $ try (string "#" <|> string "\\#")
-        a <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
-        pure [(pos'', Anchor a)]
+        anchPos <- startPosition (string "#" <|> string' "\\#")
+        txt <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
+        pure [(anchPos, Anchor txt)]
 
     void $ char '"'
-    pure $ [(pos, Module), (pos', Token modName)] <> anch
+    pure $ [(startPos, Module), (modPos, Token modName)] <> anch
   where
     modId = intercalate "." <$> (fmap Text.pack <$> (conId `sepBy1` (char '.')))
 
diff --git a/test/Spec.hs b/test/Spec.hs
index 9f76b8b..d202ac2 100644
--- a/test/Spec.hs
+++ b/test/Spec.hs
@@ -54,6 +54,11 @@ modules = do
                       , (1, 18, Anchor "myAnchor")
                       ]
 
+    "\"OtherModule.Name\\#myAnchor\""
+        `shouldLexTo` [ (1, 1, Module)
+                      , (1, 2, Token "OtherModule.Name")
+                      , (1, 18, Anchor "myAnchor")
+                      ]
 link :: Expectation
 link =
     "[link to](http://some.website)"