forked from elland/haddock2
Improved lexer funcs
This commit is contained in:
parent
f586b90434
commit
e81f1ea4f7
2 changed files with 118 additions and 67 deletions
47
src/Lexer.hs
47
src/Lexer.hs
|
|
@ -6,7 +6,7 @@ module Lexer (
|
||||||
emphasis,
|
emphasis,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Monad (mfilter)
|
import Control.Monad (mfilter, void)
|
||||||
import Data.Char (isAlphaNum, isPrint)
|
import Data.Char (isAlphaNum, isPrint)
|
||||||
import Data.Functor (($>))
|
import Data.Functor (($>))
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
|
|
@ -26,6 +26,7 @@ data Token
|
||||||
| Anchor
|
| Anchor
|
||||||
| AngleOpen
|
| AngleOpen
|
||||||
| AngleClose
|
| AngleClose
|
||||||
|
| BirdTrack
|
||||||
| BoldOpen
|
| BoldOpen
|
||||||
| BoldClose
|
| BoldClose
|
||||||
| BracketOpen
|
| BracketOpen
|
||||||
|
|
@ -54,11 +55,14 @@ lexText = go
|
||||||
Just _ -> pure []
|
Just _ -> pure []
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
toks <-
|
toks <-
|
||||||
choice
|
choice $
|
||||||
[ newlineToken
|
Parsec.try
|
||||||
|
<$> [ newlineToken
|
||||||
, spaceToken
|
, spaceToken
|
||||||
|
, quotes
|
||||||
, textElement
|
, textElement
|
||||||
, identifier
|
, identifier
|
||||||
|
, birdTrack
|
||||||
, other
|
, other
|
||||||
]
|
]
|
||||||
rest <- go
|
rest <- go
|
||||||
|
|
@ -86,31 +90,40 @@ textElement =
|
||||||
, angles
|
, angles
|
||||||
]
|
]
|
||||||
|
|
||||||
delimited :: String -> String -> Token -> Token -> Parser [LocatedToken]
|
delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken]
|
||||||
delimited c1 c2 ot ct = do
|
delimitedMaybe op cl ot ct = do
|
||||||
(_, content) <- match $ between op cl any'
|
|
||||||
pos <- getPosition
|
pos <- getPosition
|
||||||
let openTok :: LocatedToken = (pos, ot)
|
(text, content) <- match $ between op cl any'
|
||||||
closeTok :: LocatedToken = (pos, ct)
|
let openTok :: LocatedToken = (setSourceColumn pos 1, ot)
|
||||||
res :: LocatedToken = (pos, Token content)
|
res :: LocatedToken = (setSourceColumn pos 2, Token content)
|
||||||
|
closeToks :: [LocatedToken] = case ct of
|
||||||
|
Just close -> [(setSourceColumn pos (Text.length text), close)]
|
||||||
|
Nothing -> []
|
||||||
|
|
||||||
pure [openTok, res, closeTok]
|
pure $ [openTok, res] <> closeToks
|
||||||
where
|
where
|
||||||
op = string c1
|
|
||||||
cl = string c2
|
|
||||||
any' = Text.pack <$> manyTill anyChar (lookAhead cl)
|
any' = Text.pack <$> manyTill anyChar (lookAhead cl)
|
||||||
|
|
||||||
delimited' :: String -> Token -> Token -> Parser [LocatedToken]
|
delimited :: Parser a -> Parser a -> Token -> Token -> Parser [LocatedToken]
|
||||||
delimited' s t1 t2 = delimited s s t1 t2
|
delimited a b c d = delimitedMaybe a b c (Just d)
|
||||||
|
|
||||||
|
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
|
||||||
|
delimitedSymmetric s t1 t2 = delimited s s t1 t2
|
||||||
|
|
||||||
|
birdTrack :: Lexer
|
||||||
|
birdTrack = delimitedMaybe (void ">> ") (void "\n" <|> Parsec.eof) BirdTrack Nothing
|
||||||
|
|
||||||
|
quotes :: Lexer
|
||||||
|
quotes = delimitedSymmetric "\"" QuoteOpen QuoteClose
|
||||||
|
|
||||||
emphasis :: Lexer
|
emphasis :: Lexer
|
||||||
emphasis = delimited' "/" EmphasisOpen EmphasisClose
|
emphasis = delimitedSymmetric "/" EmphasisOpen EmphasisClose
|
||||||
|
|
||||||
bold :: Lexer
|
bold :: Lexer
|
||||||
bold = delimited' "__" BoldOpen BoldClose
|
bold = delimitedSymmetric "__" BoldOpen BoldClose
|
||||||
|
|
||||||
monospace :: Lexer
|
monospace :: Lexer
|
||||||
monospace = delimited' "@" MonospaceOpen MonospaceClose
|
monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
|
||||||
|
|
||||||
parens :: Parser [LocatedToken]
|
parens :: Parser [LocatedToken]
|
||||||
parens = delimited "(" ")" ParenOpen ParenClose
|
parens = delimited "(" ")" ParenOpen ParenClose
|
||||||
|
|
|
||||||
128
test/Spec.hs
128
test/Spec.hs
|
|
@ -12,8 +12,87 @@ import Data.String (IsString (..))
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import Text.Parsec.Pos
|
import Text.Parsec.Pos
|
||||||
|
|
||||||
shouldParseTo :: Text -> DocMarkup mod Identifier -> Expectation
|
main :: IO ()
|
||||||
shouldParseTo input ast = parseText input `shouldBe` ast
|
main = hspec $ do
|
||||||
|
describe "Lexer" do
|
||||||
|
describe "minimal" do
|
||||||
|
it "space chars" space
|
||||||
|
it "bare string" someString
|
||||||
|
it "emphasis" emphatic
|
||||||
|
it "monospace" monospace
|
||||||
|
it "bird tracks" birdTracks
|
||||||
|
it "quotes" quotes
|
||||||
|
it "ignores nesting" ignoreNesting
|
||||||
|
|
||||||
|
describe "Parser" do
|
||||||
|
it "Bold" do
|
||||||
|
"__bold__" `shouldParseTo` (DocBold (DocString "bold"))
|
||||||
|
it "Emphasis" do
|
||||||
|
"/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis"))
|
||||||
|
|
||||||
|
------------
|
||||||
|
-- Tests
|
||||||
|
------------
|
||||||
|
|
||||||
|
ignoreNesting :: Expectation
|
||||||
|
ignoreNesting =
|
||||||
|
">/foo/"
|
||||||
|
`shouldLexTo` [ (1, 1, Token ">/foo/")
|
||||||
|
]
|
||||||
|
|
||||||
|
birdTracks :: Expectation
|
||||||
|
birdTracks =
|
||||||
|
">> code"
|
||||||
|
`shouldLexTo` [ (1, 1, BirdTrack)
|
||||||
|
, (1, 2, Token "code")
|
||||||
|
]
|
||||||
|
|
||||||
|
quotes :: Expectation
|
||||||
|
quotes =
|
||||||
|
"\"quoted\""
|
||||||
|
`shouldLexTo` [ (1, 1, QuoteOpen)
|
||||||
|
, (1, 2, Token "quoted")
|
||||||
|
, (1, 8, QuoteClose)
|
||||||
|
]
|
||||||
|
|
||||||
|
space :: Expectation
|
||||||
|
space = do
|
||||||
|
"\n "
|
||||||
|
`shouldLexTo` [ (1, 1, Newline)
|
||||||
|
, (2, 1, Space)
|
||||||
|
]
|
||||||
|
" \n"
|
||||||
|
`shouldLexTo` [ (1, 1, Space)
|
||||||
|
, (1, 2, Newline)
|
||||||
|
]
|
||||||
|
|
||||||
|
monospace :: Expectation
|
||||||
|
monospace =
|
||||||
|
"@mono@"
|
||||||
|
`shouldLexTo` [ (1, 1, MonospaceOpen)
|
||||||
|
, (1, 2, Token "mono")
|
||||||
|
, (1, 6, MonospaceClose)
|
||||||
|
]
|
||||||
|
|
||||||
|
emphatic :: Expectation
|
||||||
|
emphatic =
|
||||||
|
"/emphatic/"
|
||||||
|
`shouldLexTo` [ (1, 1, EmphasisOpen)
|
||||||
|
, (1, 2, Token "emphatic")
|
||||||
|
, (1, 10, EmphasisClose)
|
||||||
|
]
|
||||||
|
|
||||||
|
someString :: Expectation
|
||||||
|
someString =
|
||||||
|
"some string"
|
||||||
|
`shouldLexTo` [ (1, 1, Token "some")
|
||||||
|
, (1, 5, Space)
|
||||||
|
, (1, 6, Token "string")
|
||||||
|
]
|
||||||
|
|
||||||
|
--------------
|
||||||
|
-- Helpers
|
||||||
|
--------------
|
||||||
|
|
||||||
type Doc id = DocMarkup () id
|
type Doc id = DocMarkup () id
|
||||||
|
|
||||||
|
|
@ -28,46 +107,5 @@ shouldLexTo input expected =
|
||||||
actual `shouldBe` expected
|
actual `shouldBe` expected
|
||||||
Left err -> expectationFailure $ "Parse error: " <> show err
|
Left err -> expectationFailure $ "Parse error: " <> show err
|
||||||
|
|
||||||
main :: IO ()
|
shouldParseTo :: Text -> DocMarkup mod Identifier -> Expectation
|
||||||
main = hspec $ do
|
shouldParseTo input ast = parseText input `shouldBe` ast
|
||||||
describe "Lexer" do
|
|
||||||
describe "minimal" do
|
|
||||||
it "bare string" someString
|
|
||||||
it "emphasis" emphatic
|
|
||||||
it "monospace" monospace
|
|
||||||
it "ignores nesting" ignoreNesting
|
|
||||||
describe "Parser" do
|
|
||||||
it "Bold" do
|
|
||||||
"__bold__" `shouldParseTo` (DocBold (DocString "bold"))
|
|
||||||
it "Emphasis" do
|
|
||||||
"/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis"))
|
|
||||||
|
|
||||||
monospace :: Expectation
|
|
||||||
monospace =
|
|
||||||
"@mono@"
|
|
||||||
`shouldLexTo` [ (1, 7, MonospaceOpen)
|
|
||||||
, (1, 7, Token "mono")
|
|
||||||
, (1, 7, MonospaceClose)
|
|
||||||
]
|
|
||||||
|
|
||||||
ignoreNesting :: Expectation
|
|
||||||
ignoreNesting =
|
|
||||||
">/foo/"
|
|
||||||
`shouldLexTo` [ (1, 1, Token ">/foo/")
|
|
||||||
]
|
|
||||||
|
|
||||||
emphatic :: Expectation
|
|
||||||
emphatic =
|
|
||||||
"/emphatic/"
|
|
||||||
`shouldLexTo` [ (1, 11, EmphasisOpen)
|
|
||||||
, (1, 11, Token "emphatic")
|
|
||||||
, (1, 11, EmphasisClose)
|
|
||||||
]
|
|
||||||
|
|
||||||
someString :: Expectation
|
|
||||||
someString =
|
|
||||||
"some string"
|
|
||||||
`shouldLexTo` [ (1, 1, Token "some")
|
|
||||||
, (1, 5, Space)
|
|
||||||
, (1, 6, Token "string")
|
|
||||||
]
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue