feat(lexer): implement numericEntity lexer (#6)
I did this yesterday actually, I just rebased it. Reviewed-on: #6 Reviewed-by: elland <igor@elland.me> Co-authored-by: Léana 江 <leana.jiang+git@icloud.com> Co-committed-by: Léana 江 <leana.jiang+git@icloud.com>
This commit is contained in:
parent
08dc87a307
commit
82eb8435ab
2 changed files with 40 additions and 1 deletions
28
src/Lexer.hs
28
src/Lexer.hs
|
|
@ -8,10 +8,11 @@ module Lexer (
|
|||
where
|
||||
|
||||
import Control.Monad (mfilter, void)
|
||||
import Data.Char (ord, toLower)
|
||||
import Data.Functor (($>))
|
||||
import Data.Text (Text, intercalate)
|
||||
import Data.Text qualified as Text
|
||||
import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper)
|
||||
import GHC.Unicode (isAlphaNum, isControl, isDigit, isPrint, isSpace, isUpper)
|
||||
import ParserMonad (Parser, initialParserState)
|
||||
import Text.Parsec
|
||||
import Text.Parsec qualified as Parsec
|
||||
|
|
@ -94,6 +95,7 @@ lexText = go
|
|||
, labeledLink
|
||||
, module_
|
||||
, anchor
|
||||
, numericEntity
|
||||
, textElement
|
||||
, quotes
|
||||
, birdTrack
|
||||
|
|
@ -255,6 +257,30 @@ bold = delimitedSymmetric "__" BoldOpen BoldClose
|
|||
monospace :: Lexer
|
||||
monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
|
||||
|
||||
decimal :: Parser Int
|
||||
decimal = read . Text.unpack <$> takeWhile1_ isDigit
|
||||
|
||||
hexadecimal :: Parser Int
|
||||
hexadecimal = "x" *> (convert 0 . fmap (normalise . toLower) <$> many1 hexDigit)
|
||||
where
|
||||
normalise :: Char -> Int
|
||||
normalise c
|
||||
| ord '0' <= n && n <= ord '9' = n - ord '0'
|
||||
| ord 'A' <= n && n <= ord 'F' = n - ord 'A' + 10
|
||||
| ord 'a' <= n && n <= ord 'f' = n - ord 'a' + 10
|
||||
| otherwise = error "unexpected: invalid hex number"
|
||||
where
|
||||
n = ord c
|
||||
|
||||
convert :: Int -> [Int] -> Int
|
||||
convert acc [] = acc
|
||||
convert acc (x : xs) = convert (acc * 16 + x) xs
|
||||
|
||||
numericEntity :: Lexer
|
||||
numericEntity = do
|
||||
x <- located $ between "&#" ";" (NumericEntity <$> (hexadecimal <|> decimal))
|
||||
pure [x]
|
||||
|
||||
other :: Lexer
|
||||
other = do
|
||||
pos <- getPosition
|
||||
|
|
|
|||
13
test/Spec.hs
13
test/Spec.hs
|
|
@ -29,6 +29,7 @@ main = hspec $ do
|
|||
it "bird tracks" birdTracks
|
||||
it "module names" modules
|
||||
it "quotes" quotes
|
||||
it "numeric entity" numericEntity
|
||||
it "ignores nesting" ignoreNesting
|
||||
|
||||
describe "Parser" do
|
||||
|
|
@ -152,6 +153,18 @@ space = do
|
|||
, (1, 2, Newline)
|
||||
]
|
||||
|
||||
numericEntity :: Expectation
|
||||
numericEntity = do
|
||||
"A λ"
|
||||
`shouldLexTo` [ (1, 1, NumericEntity 65)
|
||||
, (1, 6, Space)
|
||||
, (1, 7, NumericEntity 955) -- lambda
|
||||
]
|
||||
-- Hex
|
||||
"e"
|
||||
`shouldLexTo` [ (1, 1, NumericEntity 101)
|
||||
]
|
||||
|
||||
monospace :: Expectation
|
||||
monospace =
|
||||
"@mono@"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue