feat(lexer): implement numericEntity lexer (#6)

I did this yesterday actually, I just rebased it.

Reviewed-on: #6
Reviewed-by: elland <igor@elland.me>
Co-authored-by: Léana 江 <leana.jiang+git@icloud.com>
Co-committed-by: Léana 江 <leana.jiang+git@icloud.com>
This commit is contained in:
Primrose 2025-09-27 15:10:11 +00:00 committed by elland
parent 08dc87a307
commit 82eb8435ab
2 changed files with 40 additions and 1 deletions

View file

@ -8,10 +8,11 @@ module Lexer (
where
import Control.Monad (mfilter, void)
import Data.Char (ord, toLower)
import Data.Functor (($>))
import Data.Text (Text, intercalate)
import Data.Text qualified as Text
import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper)
import GHC.Unicode (isAlphaNum, isControl, isDigit, isPrint, isSpace, isUpper)
import ParserMonad (Parser, initialParserState)
import Text.Parsec
import Text.Parsec qualified as Parsec
@ -94,6 +95,7 @@ lexText = go
, labeledLink
, module_
, anchor
, numericEntity
, textElement
, quotes
, birdTrack
@ -255,6 +257,30 @@ bold = delimitedSymmetric "__" BoldOpen BoldClose
monospace :: Lexer
monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
decimal :: Parser Int
decimal = read . Text.unpack <$> takeWhile1_ isDigit
hexadecimal :: Parser Int
hexadecimal = "x" *> (convert 0 . fmap (normalise . toLower) <$> many1 hexDigit)
where
normalise :: Char -> Int
normalise c
| ord '0' <= n && n <= ord '9' = n - ord '0'
| ord 'A' <= n && n <= ord 'F' = n - ord 'A' + 10
| ord 'a' <= n && n <= ord 'f' = n - ord 'a' + 10
| otherwise = error "unexpected: invalid hex number"
where
n = ord c
convert :: Int -> [Int] -> Int
convert acc [] = acc
convert acc (x : xs) = convert (acc * 16 + x) xs
numericEntity :: Lexer
numericEntity = do
x <- located $ between "&#" ";" (NumericEntity <$> (hexadecimal <|> decimal))
pure [x]
other :: Lexer
other = do
pos <- getPosition