From 72986869976a912b2e6bda6994f39afcae9211cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Fri, 26 Sep 2025 16:51:18 +0800 Subject: [PATCH] feat(lexer): implement numericEntity lexer --- src/Lexer.hs | 28 +++++++++++++++++++++++++++- test/Spec.hs | 13 +++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 4a85fb5..47c4e1c 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -8,10 +8,11 @@ module Lexer ( where import Control.Monad (mfilter, void) +import Data.Char (ord, toLower) import Data.Functor (($>)) import Data.Text (Text, intercalate) import Data.Text qualified as Text -import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper) +import GHC.Unicode (isAlphaNum, isControl, isDigit, isPrint, isSpace, isUpper) import ParserMonad (Parser, initialParserState) import Text.Parsec import Text.Parsec qualified as Parsec @@ -94,6 +95,7 @@ lexText = go , labeledLink , module_ , anchor + , numericEntity , textElement , quotes , birdTrack @@ -255,6 +257,30 @@ bold = delimitedSymmetric "__" BoldOpen BoldClose monospace :: Lexer monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose +decimal :: Parser Int +decimal = read . Text.unpack <$> takeWhile1_ isDigit + +hexadecimal :: Parser Int +hexadecimal = "x" *> (convert 0 . fmap (normalise . toLower) <$> many1 hexDigit) + where + normalise :: Char -> Int + normalise c + | ord '0' <= n && n <= ord '9' = n - ord '0' + | ord 'A' <= n && n <= ord 'F' = n - ord 'A' + 10 + | ord 'a' <= n && n <= ord 'f' = n - ord 'a' + 10 + | otherwise = error "unexpected: invalid hex number" + where + n = ord c + + convert :: Int -> [Int] -> Int + convert acc [] = acc + convert acc (x : xs) = convert (acc * 16 + x) xs + +numericEntity :: Lexer +numericEntity = do + x <- located $ between "&#" ";" (NumericEntity <$> (hexadecimal <|> decimal)) + pure [x] + other :: Lexer other = do pos <- getPosition diff --git a/test/Spec.hs b/test/Spec.hs index 7258a2d..03e1b81 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -30,6 +30,7 @@ main = hspec $ do it "bird tracks" birdTracks it "module names" modules it "quotes" quotes + it "numeric entity" numericEntity it "ignores nesting" ignoreNesting describe "Parser" do @@ -152,6 +153,18 @@ space = do , (1, 2, Newline) ] +numericEntity :: Expectation +numericEntity = do + "A λ" + `shouldLexTo` [ (1, 1, NumericEntity 65) + , (1, 6, Space) + , (1, 7, NumericEntity 955) -- lambda + ] + -- Hex + "e" + `shouldLexTo` [ (1, 1, NumericEntity 101) + ] + monospace :: Expectation monospace = "@mono@"