Compare commits

..

1 commit

Author SHA1 Message Date
5073fc0e25
feat(lexer): implement numericEntity lexer 2025-09-26 17:25:34 +08:00
2 changed files with 42 additions and 1 deletions

View file

@ -10,7 +10,8 @@ import Control.Monad (mfilter, void)
import Data.Functor (($>)) import Data.Functor (($>))
import Data.Text (Text, intercalate) import Data.Text (Text, intercalate)
import Data.Text qualified as Text import Data.Text qualified as Text
import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper) import Data.Char (ord, toLower)
import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper, isDigit)
import ParserMonad (Parser, initialParserState) import ParserMonad (Parser, initialParserState)
import Text.Parsec import Text.Parsec
import Text.Parsec qualified as Parsec import Text.Parsec qualified as Parsec
@ -92,6 +93,7 @@ lexText = go
, labeledLink , labeledLink
, module_ , module_
, anchor , anchor
, numericEntity
, textElement , textElement
, quotes , quotes
, birdTrack , birdTrack
@ -254,6 +256,32 @@ bold = delimitedSymmetric "__" BoldOpen BoldClose
monospace :: Lexer monospace :: Lexer
monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
decimal :: Parser Int
decimal = read . Text.unpack <$> takeWhile1_ isDigit
hexadecimal :: Parser Int
hexadecimal = "x" *> (convert 0 . fmap (normalise . toLower) <$> many1 hexDigit)
where
normalise :: Char -> Int
normalise c
| ord '0' <= n && n <= ord '9' = n - ord '0'
| ord 'A' <= n && n <= ord 'F' = n - ord 'A' + 10
| ord 'a' <= n && n <= ord 'f' = n - ord 'a' + 10
| otherwise = error "unexpected: invalid hex number"
where
n = ord c
convert :: Int -> [Int] -> Int
convert acc [] = acc
convert acc (x : xs) = convert (acc * 16 + x) xs
numericEntity :: Lexer
numericEntity = do
x <- located $ between "&#" ";"
( NumericEntity <$> (hexadecimal <|> decimal)
)
pure [x]
other :: Lexer other :: Lexer
other = do other = do
pos <- getPosition pos <- getPosition

View file

@ -30,6 +30,7 @@ main = hspec $ do
it "bird tracks" birdTracks it "bird tracks" birdTracks
it "module names" modules it "module names" modules
it "quotes" quotes it "quotes" quotes
it "numeric entity" numericEntity
it "ignores nesting" ignoreNesting it "ignores nesting" ignoreNesting
describe "Parser" do describe "Parser" do
@ -152,6 +153,18 @@ space = do
, (1, 2, Newline) , (1, 2, Newline)
] ]
numericEntity :: Expectation
numericEntity = do
"&#65; &#955;"
`shouldLexTo` [ (1, 1, NumericEntity 65)
, (1, 6, Space)
, (1, 7, NumericEntity 955) -- lambda
]
-- Hex
"&#x65;"
`shouldLexTo` [ (1, 1, NumericEntity 101)
]
monospace :: Expectation monospace :: Expectation
monospace = monospace =
"@mono@" "@mono@"