Init
This commit is contained in:
commit
c9f61c4e06
16 changed files with 1366 additions and 0 deletions
190
src/Lexer.hs
Normal file
190
src/Lexer.hs
Normal file
|
|
@ -0,0 +1,190 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Lexer (
|
||||
Token (..),
|
||||
lexer,
|
||||
emphasis,
|
||||
) where
|
||||
|
||||
import Control.Monad (mfilter)
|
||||
import Data.Char (isAlphaNum, isPrint)
|
||||
import Data.Functor (($>))
|
||||
import Data.Text (Text)
|
||||
import Data.Text qualified as Text
|
||||
import GHC.Stack (HasCallStack)
|
||||
import ParserMonad (Parser, initialParserState)
|
||||
import Text.Parsec
|
||||
import Text.Parsec qualified as Parsec
|
||||
import Text.Parsec.Pos (updatePosChar)
|
||||
|
||||
type LocatedToken = (SourcePos, Token)
|
||||
|
||||
type Lexer = Parser [LocatedToken]
|
||||
|
||||
data Token
|
||||
= Token Text
|
||||
| Anchor
|
||||
| AngleOpen
|
||||
| AngleClose
|
||||
| BoldOpen
|
||||
| BoldClose
|
||||
| BracketOpen
|
||||
| BracketClose
|
||||
| EmphasisOpen
|
||||
| EmphasisClose
|
||||
| MonospaceOpen
|
||||
| MonospaceClose
|
||||
| Newline
|
||||
| ParenOpen
|
||||
| ParenClose
|
||||
| QuoteOpen
|
||||
| QuoteClose
|
||||
| Space
|
||||
| EOF
|
||||
deriving (Eq, Show)
|
||||
|
||||
lexer :: String -> Either ParseError [LocatedToken]
|
||||
lexer = Parsec.runParser lexText initialParserState "input" . Text.pack
|
||||
|
||||
lexText :: (HasCallStack) => Parser [LocatedToken]
|
||||
lexText = go
|
||||
where
|
||||
go = do
|
||||
Parsec.optionMaybe Parsec.eof >>= \case
|
||||
Just _ -> pure []
|
||||
Nothing -> do
|
||||
toks <-
|
||||
choice
|
||||
[ newlineToken
|
||||
, spaceToken
|
||||
, textElement
|
||||
, identifier
|
||||
, other
|
||||
]
|
||||
rest <- go
|
||||
pure (toks <> rest)
|
||||
|
||||
match :: Parser a -> Parser (Text, a)
|
||||
match p = do
|
||||
input <- getInput
|
||||
result <- p
|
||||
input' <- getInput
|
||||
let !consumed = Text.take (Text.length input - Text.length input') input
|
||||
pure (consumed, result)
|
||||
|
||||
-- Tokens
|
||||
|
||||
textElement :: Parser [LocatedToken]
|
||||
textElement =
|
||||
choice $
|
||||
Parsec.try
|
||||
<$> [ emphasis
|
||||
, bold
|
||||
, monospace
|
||||
, parens
|
||||
, brackets
|
||||
, angles
|
||||
]
|
||||
|
||||
delimited :: String -> String -> Token -> Token -> Parser [LocatedToken]
|
||||
delimited c1 c2 ot ct = do
|
||||
pos <- getPosition
|
||||
(_, content) <- match $ between op cl any'
|
||||
|
||||
innerToks <- case lexer $ Text.unpack content of
|
||||
Left _ -> do
|
||||
pos' <- getPosition
|
||||
pure $ [(pos', Token content)]
|
||||
Right toks -> pure toks
|
||||
|
||||
let openTok :: LocatedToken = (pos, ot)
|
||||
closeTok :: LocatedToken = (pos, ct)
|
||||
|
||||
pure $ openTok : innerToks <> [closeTok]
|
||||
where
|
||||
op = string c1
|
||||
cl = string c2
|
||||
any' = Text.pack <$> manyTill anyChar (lookAhead cl)
|
||||
|
||||
delimited' :: String -> Token -> Token -> Parser [LocatedToken]
|
||||
delimited' s t1 t2 = delimited s s t1 t2
|
||||
|
||||
emphasis :: Lexer
|
||||
emphasis = delimited' "/" EmphasisOpen EmphasisClose
|
||||
|
||||
bold :: Lexer
|
||||
bold = delimited' "__" BoldOpen BoldClose
|
||||
|
||||
monospace :: Lexer
|
||||
monospace = delimited' "@" MonospaceOpen MonospaceClose
|
||||
|
||||
parens :: Parser [LocatedToken]
|
||||
parens = delimited "(" ")" ParenOpen ParenClose
|
||||
|
||||
brackets :: Lexer
|
||||
brackets = delimited "[" "]" ParenOpen ParenClose
|
||||
|
||||
angles :: Parser [LocatedToken]
|
||||
angles = delimited "<" ">" AngleOpen AngleClose
|
||||
|
||||
other :: Lexer
|
||||
other = do
|
||||
pos <- getPosition
|
||||
c <- takeWhile1_ isPrint
|
||||
pure . pure $ (pos, Token c)
|
||||
|
||||
spaceToken :: Lexer
|
||||
spaceToken = do
|
||||
pos <- getPosition
|
||||
_ <- many1 (char ' ')
|
||||
pure . pure $ (pos, Space)
|
||||
|
||||
newlineToken :: Lexer
|
||||
newlineToken = do
|
||||
pos <- getPosition
|
||||
_ <- newline
|
||||
pure . pure $ (pos, Newline)
|
||||
|
||||
identifier :: Lexer
|
||||
identifier = do
|
||||
pos <- getPosition
|
||||
txt <- takeWhile1_ isAlphaNum
|
||||
pure . pure $ (pos, Token txt)
|
||||
|
||||
-------
|
||||
-- Helpers
|
||||
-------
|
||||
|
||||
-- | Like `takeWhile`, but unconditionally take escaped characters.
|
||||
takeWhile_ :: (Char -> Bool) -> Parser Text
|
||||
takeWhile_ p = scan p_ False
|
||||
where
|
||||
p_ escaped c
|
||||
| escaped = Just False
|
||||
| not $ p c = Nothing
|
||||
| otherwise = Just (c == '\\')
|
||||
|
||||
-- | Like 'takeWhile1', but unconditionally take escaped characters.
|
||||
takeWhile1_ :: (Char -> Bool) -> Parser Text
|
||||
takeWhile1_ = mfilter (not . Text.null) . takeWhile_
|
||||
|
||||
{- | Scan the input text, accumulating characters as long as the scanning
|
||||
function returns true.
|
||||
-}
|
||||
scan ::
|
||||
-- | scan function
|
||||
(state -> Char -> Maybe state) ->
|
||||
-- | initial state
|
||||
state ->
|
||||
Parser Text
|
||||
scan f initState = do
|
||||
parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
|
||||
(remaining, finalPos, ct) <- go input initState pos 0
|
||||
let newState = parserState{stateInput = remaining, statePos = finalPos}
|
||||
Parsec.setParserState newState $> Text.take ct input
|
||||
where
|
||||
go !input' !st !posAccum !count' = case Text.uncons input' of
|
||||
Nothing -> pure (input', posAccum, count')
|
||||
Just (char', input'') -> case f st char' of
|
||||
Nothing -> pure (input', posAccum, count')
|
||||
Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
|
||||
Loading…
Add table
Add a link
Reference in a new issue