forked from elland/haddock2
Adjusted others, labeled link
This commit is contained in:
parent
4ef8d2c28c
commit
7d4cab4857
2 changed files with 106 additions and 41 deletions
129
src/Lexer.hs
129
src/Lexer.hs
|
|
@ -7,12 +7,11 @@ module Lexer (
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Monad (mfilter, void)
|
import Control.Monad (mfilter, void)
|
||||||
import Data.Char (isAlphaNum, isPrint)
|
|
||||||
import Data.Functor (($>))
|
import Data.Functor (($>))
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import Data.Text qualified as Text
|
import Data.Text qualified as Text
|
||||||
import Debug.Trace (traceM)
|
|
||||||
import GHC.Stack (HasCallStack)
|
import GHC.Stack (HasCallStack)
|
||||||
|
import GHC.Unicode (isControl, isPrint, isSpace)
|
||||||
import ParserMonad (Parser, initialParserState)
|
import ParserMonad (Parser, initialParserState)
|
||||||
import Text.Parsec
|
import Text.Parsec
|
||||||
import Text.Parsec qualified as Parsec
|
import Text.Parsec qualified as Parsec
|
||||||
|
|
@ -22,20 +21,34 @@ type LocatedToken = (SourcePos, Token)
|
||||||
|
|
||||||
type Lexer = Parser [LocatedToken]
|
type Lexer = Parser [LocatedToken]
|
||||||
|
|
||||||
|
data Level
|
||||||
|
= One
|
||||||
|
| Two
|
||||||
|
| Three
|
||||||
|
| Four
|
||||||
|
| Five
|
||||||
|
| Six
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
data Token
|
data Token
|
||||||
= Token Text
|
= Token Text
|
||||||
| Anchor
|
| Anchor
|
||||||
| AngleOpen
|
|
||||||
| AngleClose
|
|
||||||
| BirdTrack
|
| BirdTrack
|
||||||
| BoldOpen
|
| BoldOpen
|
||||||
| BoldClose
|
| BoldClose
|
||||||
|
| Escape
|
||||||
| EmphasisOpen
|
| EmphasisOpen
|
||||||
| EmphasisClose
|
| EmphasisClose
|
||||||
|
| Header Level
|
||||||
| MonospaceOpen
|
| MonospaceOpen
|
||||||
| MonospaceClose
|
| MonospaceClose
|
||||||
| Newline
|
| Newline
|
||||||
| Escape
|
| LabeledLinkOpen
|
||||||
|
| LabeledLinkClose
|
||||||
|
| ParenOpen
|
||||||
|
| ParenClose
|
||||||
|
| BracketOpen
|
||||||
|
| BracketClose
|
||||||
| MathsParenOpen
|
| MathsParenOpen
|
||||||
| MathsParenClose
|
| MathsParenClose
|
||||||
| MathsBracketOpen
|
| MathsBracketOpen
|
||||||
|
|
@ -62,25 +75,18 @@ lexText = go
|
||||||
<$> [ mathsBracket
|
<$> [ mathsBracket
|
||||||
, mathsParens
|
, mathsParens
|
||||||
, escape -- maths go before escape to avoid mismatch
|
, escape -- maths go before escape to avoid mismatch
|
||||||
|
, headers
|
||||||
, newlineToken
|
, newlineToken
|
||||||
, spaceToken
|
, spaceToken
|
||||||
|
, labeledLink
|
||||||
, quotes
|
, quotes
|
||||||
, textElement
|
, textElement
|
||||||
, identifier
|
|
||||||
, birdTrack
|
, birdTrack
|
||||||
, other
|
, other
|
||||||
]
|
]
|
||||||
rest <- go
|
rest <- go
|
||||||
pure (toks <> rest)
|
pure (toks <> rest)
|
||||||
|
|
||||||
match :: Parser a -> Parser (Text, a)
|
|
||||||
match p = do
|
|
||||||
input <- getInput
|
|
||||||
result <- p
|
|
||||||
input' <- getInput
|
|
||||||
let !consumed = Text.take (Text.length input - Text.length input') input
|
|
||||||
pure (consumed, result)
|
|
||||||
|
|
||||||
-- Tokens
|
-- Tokens
|
||||||
|
|
||||||
textElement :: Parser [LocatedToken]
|
textElement :: Parser [LocatedToken]
|
||||||
|
|
@ -90,26 +96,39 @@ textElement =
|
||||||
<$> [ emphasis
|
<$> [ emphasis
|
||||||
, bold
|
, bold
|
||||||
, monospace
|
, monospace
|
||||||
, angles
|
]
|
||||||
|
|
||||||
|
headers :: Parser [LocatedToken]
|
||||||
|
headers =
|
||||||
|
choice $
|
||||||
|
Parsec.try
|
||||||
|
<$> [ header1
|
||||||
|
, header2
|
||||||
|
, header3
|
||||||
|
, header4
|
||||||
|
, header5
|
||||||
|
, header6
|
||||||
]
|
]
|
||||||
|
|
||||||
delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken]
|
delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken]
|
||||||
delimitedMaybe op cl ot ct = do
|
delimitedMaybe openMark closeMark openToken closeToken = do
|
||||||
openPos <- getPosition
|
openPos <- getPosition
|
||||||
void op -- opening
|
void openMark
|
||||||
tokenPos <- getPosition
|
tokenPos <- getPosition
|
||||||
content <- any'
|
content <- anyUntil closeMark
|
||||||
closePos <- getPosition
|
closePos <- getPosition
|
||||||
void cl
|
void closeMark
|
||||||
let openTok :: LocatedToken = (openPos, ot)
|
|
||||||
|
let openTok :: LocatedToken = (openPos, openToken)
|
||||||
res :: LocatedToken = (tokenPos, Token content)
|
res :: LocatedToken = (tokenPos, Token content)
|
||||||
closeToks :: [LocatedToken] = case ct of
|
closeToks :: [LocatedToken] = case closeToken of
|
||||||
Just close -> [(closePos, close)]
|
Just close -> [(closePos, close)]
|
||||||
Nothing -> []
|
Nothing -> []
|
||||||
|
|
||||||
pure $ [openTok, res] <> closeToks
|
pure $ [openTok, res] <> closeToks
|
||||||
where
|
|
||||||
any' = Text.pack <$> manyTill anyChar (lookAhead cl)
|
anyUntil :: Parser a -> Parser Text
|
||||||
|
anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p)
|
||||||
|
|
||||||
delimited :: Parser a -> Parser a -> Token -> Token -> Parser [LocatedToken]
|
delimited :: Parser a -> Parser a -> Token -> Token -> Parser [LocatedToken]
|
||||||
delimited a b c d = delimitedMaybe a b c (Just d)
|
delimited a b c d = delimitedMaybe a b c (Just d)
|
||||||
|
|
@ -120,6 +139,51 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2
|
||||||
eol :: Parser ()
|
eol :: Parser ()
|
||||||
eol = void "\n" <|> Parsec.eof
|
eol = void "\n" <|> Parsec.eof
|
||||||
|
|
||||||
|
header1 :: Lexer
|
||||||
|
header1 = delimitedMaybe (void $ "= ") eol (Header One) Nothing
|
||||||
|
|
||||||
|
header2 :: Lexer
|
||||||
|
header2 = delimitedMaybe (void $ "== ") eol (Header Two) Nothing
|
||||||
|
|
||||||
|
header3 :: Lexer
|
||||||
|
header3 = delimitedMaybe (void $ "=== ") eol (Header Three) Nothing
|
||||||
|
|
||||||
|
header4 :: Lexer
|
||||||
|
header4 = delimitedMaybe (void $ "==== ") eol (Header Four) Nothing
|
||||||
|
|
||||||
|
header5 :: Lexer
|
||||||
|
header5 = delimitedMaybe (void $ "===== ") eol (Header Five) Nothing
|
||||||
|
|
||||||
|
header6 :: Lexer
|
||||||
|
header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing
|
||||||
|
|
||||||
|
labeledLink :: Lexer
|
||||||
|
labeledLink = do
|
||||||
|
pos <- getPosition
|
||||||
|
void $ string "<"
|
||||||
|
pos1 <- getPosition
|
||||||
|
void $ string "["
|
||||||
|
pos2 <- getPosition
|
||||||
|
text <- anyUntil $ Text.pack <$> string "]"
|
||||||
|
pos3 <- getPosition
|
||||||
|
void $ "]"
|
||||||
|
pos4 <- getPosition
|
||||||
|
void $ "("
|
||||||
|
pos5 <- getPosition
|
||||||
|
link <- anyUntil $ Text.pack <$> string ")"
|
||||||
|
pos6 <- getPosition
|
||||||
|
void $ ")"
|
||||||
|
|
||||||
|
pure $
|
||||||
|
[ (pos, LabeledLinkOpen)
|
||||||
|
, (pos1, BracketOpen)
|
||||||
|
, (pos2, Token text)
|
||||||
|
, (pos3, BracketClose)
|
||||||
|
, (pos4, ParenOpen)
|
||||||
|
, (pos5, Token link)
|
||||||
|
, (pos6, ParenClose)
|
||||||
|
]
|
||||||
|
|
||||||
mathsBracket :: Lexer
|
mathsBracket :: Lexer
|
||||||
mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose
|
mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose
|
||||||
|
|
||||||
|
|
@ -144,20 +208,13 @@ bold = delimitedSymmetric "__" BoldOpen BoldClose
|
||||||
monospace :: Lexer
|
monospace :: Lexer
|
||||||
monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
|
monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
|
||||||
|
|
||||||
-- parens :: Parser [LocatedToken]
|
|
||||||
-- parens = delimited "(" ")" ParenOpen ParenClose
|
|
||||||
|
|
||||||
-- brackets :: Lexer
|
|
||||||
-- brackets = delimited "[" "]" ParenOpen ParenClose
|
|
||||||
|
|
||||||
angles :: Parser [LocatedToken]
|
|
||||||
angles = delimited "<" ">" AngleOpen AngleClose
|
|
||||||
|
|
||||||
other :: Lexer
|
other :: Lexer
|
||||||
other = do
|
other = do
|
||||||
pos <- getPosition
|
pos <- getPosition
|
||||||
c <- takeWhile1_ isPrint
|
c <- takeWhile1_ isAlphaNum
|
||||||
pure . pure $ (pos, Token c)
|
pure . pure $ (pos, Token c)
|
||||||
|
where
|
||||||
|
isAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
|
||||||
|
|
||||||
spaceToken :: Lexer
|
spaceToken :: Lexer
|
||||||
spaceToken = do
|
spaceToken = do
|
||||||
|
|
@ -171,12 +228,6 @@ newlineToken = do
|
||||||
_ <- newline
|
_ <- newline
|
||||||
pure . pure $ (pos, Newline)
|
pure . pure $ (pos, Newline)
|
||||||
|
|
||||||
identifier :: Lexer
|
|
||||||
identifier = do
|
|
||||||
pos <- getPosition
|
|
||||||
txt <- takeWhile1_ isAlphaNum
|
|
||||||
pure . pure $ (pos, Token txt)
|
|
||||||
|
|
||||||
-------
|
-------
|
||||||
-- Helpers
|
-- Helpers
|
||||||
-------
|
-------
|
||||||
|
|
|
||||||
18
test/Spec.hs
18
test/Spec.hs
|
|
@ -23,6 +23,7 @@ main = hspec $ do
|
||||||
it "bare string" someString
|
it "bare string" someString
|
||||||
it "emphasis" emphatic
|
it "emphasis" emphatic
|
||||||
it "monospace" monospace
|
it "monospace" monospace
|
||||||
|
it "labeled link" labeledLink
|
||||||
it "bird tracks" birdTracks
|
it "bird tracks" birdTracks
|
||||||
it "quotes" quotes
|
it "quotes" quotes
|
||||||
it "ignores nesting" ignoreNesting
|
it "ignores nesting" ignoreNesting
|
||||||
|
|
@ -37,6 +38,20 @@ main = hspec $ do
|
||||||
-- Tests
|
-- Tests
|
||||||
------------
|
------------
|
||||||
|
|
||||||
|
labeledLink :: Expectation
|
||||||
|
labeledLink =
|
||||||
|
"<[link here](http://to.here) label>"
|
||||||
|
`shouldLexTo` [ (1, 1, LabeledLinkOpen)
|
||||||
|
, (1, 2, BracketOpen)
|
||||||
|
, (1, 3, Token "link here")
|
||||||
|
, (1, 12, BracketClose)
|
||||||
|
, (1, 13, ParenOpen)
|
||||||
|
, (1, 14, Token "http://to.here")
|
||||||
|
, (1, 28, ParenClose)
|
||||||
|
, (1, 29, Space)
|
||||||
|
, (1, 30, Token "label>")
|
||||||
|
]
|
||||||
|
|
||||||
maths :: IO ()
|
maths :: IO ()
|
||||||
maths = do
|
maths = do
|
||||||
"\\[some math\\]"
|
"\\[some math\\]"
|
||||||
|
|
@ -60,8 +75,7 @@ escaping =
|
||||||
unicode :: Expectation
|
unicode :: Expectation
|
||||||
unicode =
|
unicode =
|
||||||
"ドラゴンクエストの冒険者🐉"
|
"ドラゴンクエストの冒険者🐉"
|
||||||
`shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者")
|
`shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者🐉")
|
||||||
, (1, 13, Token "🐉")
|
|
||||||
]
|
]
|
||||||
|
|
||||||
ignoreNesting :: Expectation
|
ignoreNesting :: Expectation
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue