Compare commits

...

1 commit

Author SHA1 Message Date
5073fc0e25
feat(lexer): implement numericEntity lexer 2025-09-26 17:25:34 +08:00
2 changed files with 42 additions and 1 deletions

View file

@ -10,7 +10,8 @@ import Control.Monad (mfilter, void)
import Data.Functor (($>))
import Data.Text (Text, intercalate)
import Data.Text qualified as Text
import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper)
import Data.Char (ord, toLower)
import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper, isDigit)
import ParserMonad (Parser, initialParserState)
import Text.Parsec
import Text.Parsec qualified as Parsec
@ -92,6 +93,7 @@ lexText = go
, labeledLink
, module_
, anchor
, numericEntity
, textElement
, quotes
, birdTrack
@ -254,6 +256,32 @@ bold = delimitedSymmetric "__" BoldOpen BoldClose
monospace :: Lexer
monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
decimal :: Parser Int
decimal = read . Text.unpack <$> takeWhile1_ isDigit
hexadecimal :: Parser Int
hexadecimal = "x" *> (convert 0 . fmap (normalise . toLower) <$> many1 hexDigit)
where
normalise :: Char -> Int
normalise c
| ord '0' <= n && n <= ord '9' = n - ord '0'
| ord 'A' <= n && n <= ord 'F' = n - ord 'A' + 10
| ord 'a' <= n && n <= ord 'f' = n - ord 'a' + 10
| otherwise = error "unexpected: invalid hex number"
where
n = ord c
convert :: Int -> [Int] -> Int
convert acc [] = acc
convert acc (x : xs) = convert (acc * 16 + x) xs
numericEntity :: Lexer
numericEntity = do
x <- located $ between "&#" ";"
( NumericEntity <$> (hexadecimal <|> decimal)
)
pure [x]
other :: Lexer
other = do
pos <- getPosition

View file

@ -30,6 +30,7 @@ main = hspec $ do
it "bird tracks" birdTracks
it "module names" modules
it "quotes" quotes
it "numeric entity" numericEntity
it "ignores nesting" ignoreNesting
describe "Parser" do
@ -152,6 +153,18 @@ space = do
, (1, 2, Newline)
]
numericEntity :: Expectation
numericEntity = do
"&#65; &#955;"
`shouldLexTo` [ (1, 1, NumericEntity 65)
, (1, 6, Space)
, (1, 7, NumericEntity 955) -- lambda
]
-- Hex
"&#x65;"
`shouldLexTo` [ (1, 1, NumericEntity 101)
]
monospace :: Expectation
monospace =
"@mono@"