forked from elland/haddock2
Adjusted others, labeled link
This commit is contained in:
parent
4ef8d2c28c
commit
7d4cab4857
2 changed files with 106 additions and 41 deletions
129
src/Lexer.hs
129
src/Lexer.hs
|
|
@ -7,12 +7,11 @@ module Lexer (
|
|||
) where
|
||||
|
||||
import Control.Monad (mfilter, void)
|
||||
import Data.Char (isAlphaNum, isPrint)
|
||||
import Data.Functor (($>))
|
||||
import Data.Text (Text)
|
||||
import Data.Text qualified as Text
|
||||
import Debug.Trace (traceM)
|
||||
import GHC.Stack (HasCallStack)
|
||||
import GHC.Unicode (isControl, isPrint, isSpace)
|
||||
import ParserMonad (Parser, initialParserState)
|
||||
import Text.Parsec
|
||||
import Text.Parsec qualified as Parsec
|
||||
|
|
@ -22,20 +21,34 @@ type LocatedToken = (SourcePos, Token)
|
|||
|
||||
type Lexer = Parser [LocatedToken]
|
||||
|
||||
data Level
|
||||
= One
|
||||
| Two
|
||||
| Three
|
||||
| Four
|
||||
| Five
|
||||
| Six
|
||||
deriving (Eq, Show)
|
||||
|
||||
data Token
|
||||
= Token Text
|
||||
| Anchor
|
||||
| AngleOpen
|
||||
| AngleClose
|
||||
| BirdTrack
|
||||
| BoldOpen
|
||||
| BoldClose
|
||||
| Escape
|
||||
| EmphasisOpen
|
||||
| EmphasisClose
|
||||
| Header Level
|
||||
| MonospaceOpen
|
||||
| MonospaceClose
|
||||
| Newline
|
||||
| Escape
|
||||
| LabeledLinkOpen
|
||||
| LabeledLinkClose
|
||||
| ParenOpen
|
||||
| ParenClose
|
||||
| BracketOpen
|
||||
| BracketClose
|
||||
| MathsParenOpen
|
||||
| MathsParenClose
|
||||
| MathsBracketOpen
|
||||
|
|
@ -62,25 +75,18 @@ lexText = go
|
|||
<$> [ mathsBracket
|
||||
, mathsParens
|
||||
, escape -- maths go before escape to avoid mismatch
|
||||
, headers
|
||||
, newlineToken
|
||||
, spaceToken
|
||||
, labeledLink
|
||||
, quotes
|
||||
, textElement
|
||||
, identifier
|
||||
, birdTrack
|
||||
, other
|
||||
]
|
||||
rest <- go
|
||||
pure (toks <> rest)
|
||||
|
||||
match :: Parser a -> Parser (Text, a)
|
||||
match p = do
|
||||
input <- getInput
|
||||
result <- p
|
||||
input' <- getInput
|
||||
let !consumed = Text.take (Text.length input - Text.length input') input
|
||||
pure (consumed, result)
|
||||
|
||||
-- Tokens
|
||||
|
||||
textElement :: Parser [LocatedToken]
|
||||
|
|
@ -90,26 +96,39 @@ textElement =
|
|||
<$> [ emphasis
|
||||
, bold
|
||||
, monospace
|
||||
, angles
|
||||
]
|
||||
|
||||
headers :: Parser [LocatedToken]
|
||||
headers =
|
||||
choice $
|
||||
Parsec.try
|
||||
<$> [ header1
|
||||
, header2
|
||||
, header3
|
||||
, header4
|
||||
, header5
|
||||
, header6
|
||||
]
|
||||
|
||||
delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken]
|
||||
delimitedMaybe op cl ot ct = do
|
||||
delimitedMaybe openMark closeMark openToken closeToken = do
|
||||
openPos <- getPosition
|
||||
void op -- opening
|
||||
void openMark
|
||||
tokenPos <- getPosition
|
||||
content <- any'
|
||||
content <- anyUntil closeMark
|
||||
closePos <- getPosition
|
||||
void cl
|
||||
let openTok :: LocatedToken = (openPos, ot)
|
||||
void closeMark
|
||||
|
||||
let openTok :: LocatedToken = (openPos, openToken)
|
||||
res :: LocatedToken = (tokenPos, Token content)
|
||||
closeToks :: [LocatedToken] = case ct of
|
||||
closeToks :: [LocatedToken] = case closeToken of
|
||||
Just close -> [(closePos, close)]
|
||||
Nothing -> []
|
||||
|
||||
pure $ [openTok, res] <> closeToks
|
||||
where
|
||||
any' = Text.pack <$> manyTill anyChar (lookAhead cl)
|
||||
|
||||
anyUntil :: Parser a -> Parser Text
|
||||
anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p)
|
||||
|
||||
delimited :: Parser a -> Parser a -> Token -> Token -> Parser [LocatedToken]
|
||||
delimited a b c d = delimitedMaybe a b c (Just d)
|
||||
|
|
@ -120,6 +139,51 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2
|
|||
eol :: Parser ()
|
||||
eol = void "\n" <|> Parsec.eof
|
||||
|
||||
header1 :: Lexer
|
||||
header1 = delimitedMaybe (void $ "= ") eol (Header One) Nothing
|
||||
|
||||
header2 :: Lexer
|
||||
header2 = delimitedMaybe (void $ "== ") eol (Header Two) Nothing
|
||||
|
||||
header3 :: Lexer
|
||||
header3 = delimitedMaybe (void $ "=== ") eol (Header Three) Nothing
|
||||
|
||||
header4 :: Lexer
|
||||
header4 = delimitedMaybe (void $ "==== ") eol (Header Four) Nothing
|
||||
|
||||
header5 :: Lexer
|
||||
header5 = delimitedMaybe (void $ "===== ") eol (Header Five) Nothing
|
||||
|
||||
header6 :: Lexer
|
||||
header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing
|
||||
|
||||
labeledLink :: Lexer
|
||||
labeledLink = do
|
||||
pos <- getPosition
|
||||
void $ string "<"
|
||||
pos1 <- getPosition
|
||||
void $ string "["
|
||||
pos2 <- getPosition
|
||||
text <- anyUntil $ Text.pack <$> string "]"
|
||||
pos3 <- getPosition
|
||||
void $ "]"
|
||||
pos4 <- getPosition
|
||||
void $ "("
|
||||
pos5 <- getPosition
|
||||
link <- anyUntil $ Text.pack <$> string ")"
|
||||
pos6 <- getPosition
|
||||
void $ ")"
|
||||
|
||||
pure $
|
||||
[ (pos, LabeledLinkOpen)
|
||||
, (pos1, BracketOpen)
|
||||
, (pos2, Token text)
|
||||
, (pos3, BracketClose)
|
||||
, (pos4, ParenOpen)
|
||||
, (pos5, Token link)
|
||||
, (pos6, ParenClose)
|
||||
]
|
||||
|
||||
mathsBracket :: Lexer
|
||||
mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose
|
||||
|
||||
|
|
@ -144,20 +208,13 @@ bold = delimitedSymmetric "__" BoldOpen BoldClose
|
|||
monospace :: Lexer
|
||||
monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
|
||||
|
||||
-- parens :: Parser [LocatedToken]
|
||||
-- parens = delimited "(" ")" ParenOpen ParenClose
|
||||
|
||||
-- brackets :: Lexer
|
||||
-- brackets = delimited "[" "]" ParenOpen ParenClose
|
||||
|
||||
angles :: Parser [LocatedToken]
|
||||
angles = delimited "<" ">" AngleOpen AngleClose
|
||||
|
||||
other :: Lexer
|
||||
other = do
|
||||
pos <- getPosition
|
||||
c <- takeWhile1_ isPrint
|
||||
c <- takeWhile1_ isAlphaNum
|
||||
pure . pure $ (pos, Token c)
|
||||
where
|
||||
isAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
|
||||
|
||||
spaceToken :: Lexer
|
||||
spaceToken = do
|
||||
|
|
@ -171,12 +228,6 @@ newlineToken = do
|
|||
_ <- newline
|
||||
pure . pure $ (pos, Newline)
|
||||
|
||||
identifier :: Lexer
|
||||
identifier = do
|
||||
pos <- getPosition
|
||||
txt <- takeWhile1_ isAlphaNum
|
||||
pure . pure $ (pos, Token txt)
|
||||
|
||||
-------
|
||||
-- Helpers
|
||||
-------
|
||||
|
|
|
|||
18
test/Spec.hs
18
test/Spec.hs
|
|
@ -23,6 +23,7 @@ main = hspec $ do
|
|||
it "bare string" someString
|
||||
it "emphasis" emphatic
|
||||
it "monospace" monospace
|
||||
it "labeled link" labeledLink
|
||||
it "bird tracks" birdTracks
|
||||
it "quotes" quotes
|
||||
it "ignores nesting" ignoreNesting
|
||||
|
|
@ -37,6 +38,20 @@ main = hspec $ do
|
|||
-- Tests
|
||||
------------
|
||||
|
||||
labeledLink :: Expectation
|
||||
labeledLink =
|
||||
"<[link here](http://to.here) label>"
|
||||
`shouldLexTo` [ (1, 1, LabeledLinkOpen)
|
||||
, (1, 2, BracketOpen)
|
||||
, (1, 3, Token "link here")
|
||||
, (1, 12, BracketClose)
|
||||
, (1, 13, ParenOpen)
|
||||
, (1, 14, Token "http://to.here")
|
||||
, (1, 28, ParenClose)
|
||||
, (1, 29, Space)
|
||||
, (1, 30, Token "label>")
|
||||
]
|
||||
|
||||
maths :: IO ()
|
||||
maths = do
|
||||
"\\[some math\\]"
|
||||
|
|
@ -60,8 +75,7 @@ escaping =
|
|||
unicode :: Expectation
|
||||
unicode =
|
||||
"ドラゴンクエストの冒険者🐉"
|
||||
`shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者")
|
||||
, (1, 13, Token "🐉")
|
||||
`shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者🐉")
|
||||
]
|
||||
|
||||
ignoreNesting :: Expectation
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue