feat(lexer): implement numericEntity lexer (#6)

I did this yesterday actually, I just rebased it.

Reviewed-on: elland/haddock2#6
Reviewed-by: elland <igor@elland.me>
Co-authored-by: Léana 江 <leana.jiang+git@icloud.com>
Co-committed-by: Léana 江 <leana.jiang+git@icloud.com>
This commit is contained in:
Primrose 2025-09-27 15:10:11 +00:00 committed by elland
parent 08dc87a307
commit 82eb8435ab
2 changed files with 40 additions and 1 deletions

View file

@ -8,10 +8,11 @@ module Lexer (
where where
import Control.Monad (mfilter, void) import Control.Monad (mfilter, void)
import Data.Char (ord, toLower)
import Data.Functor (($>)) import Data.Functor (($>))
import Data.Text (Text, intercalate) import Data.Text (Text, intercalate)
import Data.Text qualified as Text import Data.Text qualified as Text
import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper) import GHC.Unicode (isAlphaNum, isControl, isDigit, isPrint, isSpace, isUpper)
import ParserMonad (Parser, initialParserState) import ParserMonad (Parser, initialParserState)
import Text.Parsec import Text.Parsec
import Text.Parsec qualified as Parsec import Text.Parsec qualified as Parsec
@ -94,6 +95,7 @@ lexText = go
, labeledLink , labeledLink
, module_ , module_
, anchor , anchor
, numericEntity
, textElement , textElement
, quotes , quotes
, birdTrack , birdTrack
@ -255,6 +257,30 @@ bold = delimitedSymmetric "__" BoldOpen BoldClose
monospace :: Lexer monospace :: Lexer
monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
decimal :: Parser Int
decimal = read . Text.unpack <$> takeWhile1_ isDigit
hexadecimal :: Parser Int
hexadecimal = "x" *> (convert 0 . fmap (normalise . toLower) <$> many1 hexDigit)
where
normalise :: Char -> Int
normalise c
| ord '0' <= n && n <= ord '9' = n - ord '0'
| ord 'A' <= n && n <= ord 'F' = n - ord 'A' + 10
| ord 'a' <= n && n <= ord 'f' = n - ord 'a' + 10
| otherwise = error "unexpected: invalid hex number"
where
n = ord c
convert :: Int -> [Int] -> Int
convert acc [] = acc
convert acc (x : xs) = convert (acc * 16 + x) xs
numericEntity :: Lexer
numericEntity = do
x <- located $ between "&#" ";" (NumericEntity <$> (hexadecimal <|> decimal))
pure [x]
other :: Lexer other :: Lexer
other = do other = do
pos <- getPosition pos <- getPosition

View file

@ -29,6 +29,7 @@ main = hspec $ do
it "bird tracks" birdTracks it "bird tracks" birdTracks
it "module names" modules it "module names" modules
it "quotes" quotes it "quotes" quotes
it "numeric entity" numericEntity
it "ignores nesting" ignoreNesting it "ignores nesting" ignoreNesting
describe "Parser" do describe "Parser" do
@ -152,6 +153,18 @@ space = do
, (1, 2, Newline) , (1, 2, Newline)
] ]
numericEntity :: Expectation
numericEntity = do
"&#65; &#955;"
`shouldLexTo` [ (1, 1, NumericEntity 65)
, (1, 6, Space)
, (1, 7, NumericEntity 955) -- lambda
]
-- Hex
"&#x65;"
`shouldLexTo` [ (1, 1, NumericEntity 101)
]
monospace :: Expectation monospace :: Expectation
monospace = monospace =
"@mono@" "@mono@"