From 7d4cab48579c63eb382b987ea3e96f6a07490f37 Mon Sep 17 00:00:00 2001 From: Igor Ranieri Date: Sun, 21 Sep 2025 13:30:01 +0200 Subject: [PATCH] Adjusted others, labeled link --- src/Lexer.hs | 129 +++++++++++++++++++++++++++++++++++---------------- test/Spec.hs | 18 ++++++- 2 files changed, 106 insertions(+), 41 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 1f921bd..438ac0e 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -7,12 +7,11 @@ module Lexer ( ) where import Control.Monad (mfilter, void) -import Data.Char (isAlphaNum, isPrint) import Data.Functor (($>)) import Data.Text (Text) import Data.Text qualified as Text -import Debug.Trace (traceM) import GHC.Stack (HasCallStack) +import GHC.Unicode (isControl, isPrint, isSpace) import ParserMonad (Parser, initialParserState) import Text.Parsec import Text.Parsec qualified as Parsec @@ -22,20 +21,34 @@ type LocatedToken = (SourcePos, Token) type Lexer = Parser [LocatedToken] +data Level + = One + | Two + | Three + | Four + | Five + | Six + deriving (Eq, Show) + data Token = Token Text | Anchor - | AngleOpen - | AngleClose | BirdTrack | BoldOpen | BoldClose + | Escape | EmphasisOpen | EmphasisClose + | Header Level | MonospaceOpen | MonospaceClose | Newline - | Escape + | LabeledLinkOpen + | LabeledLinkClose + | ParenOpen + | ParenClose + | BracketOpen + | BracketClose | MathsParenOpen | MathsParenClose | MathsBracketOpen @@ -62,25 +75,18 @@ lexText = go <$> [ mathsBracket , mathsParens , escape -- maths go before escape to avoid mismatch + , headers , newlineToken , spaceToken + , labeledLink , quotes , textElement - , identifier , birdTrack , other ] rest <- go pure (toks <> rest) -match :: Parser a -> Parser (Text, a) -match p = do - input <- getInput - result <- p - input' <- getInput - let !consumed = Text.take (Text.length input - Text.length input') input - pure (consumed, result) - -- Tokens textElement :: Parser [LocatedToken] @@ -90,26 +96,39 @@ textElement = <$> [ emphasis , bold , monospace - , angles + ] + +headers :: Parser [LocatedToken] +headers = + choice $ + Parsec.try + <$> [ header1 + , header2 + , header3 + , header4 + , header5 + , header6 ] delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken] -delimitedMaybe op cl ot ct = do +delimitedMaybe openMark closeMark openToken closeToken = do openPos <- getPosition - void op -- opening + void openMark tokenPos <- getPosition - content <- any' + content <- anyUntil closeMark closePos <- getPosition - void cl - let openTok :: LocatedToken = (openPos, ot) + void closeMark + + let openTok :: LocatedToken = (openPos, openToken) res :: LocatedToken = (tokenPos, Token content) - closeToks :: [LocatedToken] = case ct of + closeToks :: [LocatedToken] = case closeToken of Just close -> [(closePos, close)] Nothing -> [] pure $ [openTok, res] <> closeToks - where - any' = Text.pack <$> manyTill anyChar (lookAhead cl) + +anyUntil :: Parser a -> Parser Text +anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p) delimited :: Parser a -> Parser a -> Token -> Token -> Parser [LocatedToken] delimited a b c d = delimitedMaybe a b c (Just d) @@ -120,6 +139,51 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2 eol :: Parser () eol = void "\n" <|> Parsec.eof +header1 :: Lexer +header1 = delimitedMaybe (void $ "= ") eol (Header One) Nothing + +header2 :: Lexer +header2 = delimitedMaybe (void $ "== ") eol (Header Two) Nothing + +header3 :: Lexer +header3 = delimitedMaybe (void $ "=== ") eol (Header Three) Nothing + +header4 :: Lexer +header4 = delimitedMaybe (void $ "==== ") eol (Header Four) Nothing + +header5 :: Lexer +header5 = delimitedMaybe (void $ "===== ") eol (Header Five) Nothing + +header6 :: Lexer +header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing + +labeledLink :: Lexer +labeledLink = do + pos <- getPosition + void $ string "<" + pos1 <- getPosition + void $ string "[" + pos2 <- getPosition + text <- anyUntil $ Text.pack <$> string "]" + pos3 <- getPosition + void $ "]" + pos4 <- getPosition + void $ "(" + pos5 <- getPosition + link <- anyUntil $ Text.pack <$> string ")" + pos6 <- getPosition + void $ ")" + + pure $ + [ (pos, LabeledLinkOpen) + , (pos1, BracketOpen) + , (pos2, Token text) + , (pos3, BracketClose) + , (pos4, ParenOpen) + , (pos5, Token link) + , (pos6, ParenClose) + ] + mathsBracket :: Lexer mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose @@ -144,20 +208,13 @@ bold = delimitedSymmetric "__" BoldOpen BoldClose monospace :: Lexer monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose --- parens :: Parser [LocatedToken] --- parens = delimited "(" ")" ParenOpen ParenClose - --- brackets :: Lexer --- brackets = delimited "[" "]" ParenOpen ParenClose - -angles :: Parser [LocatedToken] -angles = delimited "<" ">" AngleOpen AngleClose - other :: Lexer other = do pos <- getPosition - c <- takeWhile1_ isPrint + c <- takeWhile1_ isAlphaNum pure . pure $ (pos, Token c) + where + isAlphaNum c = isPrint c && not (isControl c) && not (isSpace c) spaceToken :: Lexer spaceToken = do @@ -171,12 +228,6 @@ newlineToken = do _ <- newline pure . pure $ (pos, Newline) -identifier :: Lexer -identifier = do - pos <- getPosition - txt <- takeWhile1_ isAlphaNum - pure . pure $ (pos, Token txt) - ------- -- Helpers ------- diff --git a/test/Spec.hs b/test/Spec.hs index 2475a51..ce09fe0 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -23,6 +23,7 @@ main = hspec $ do it "bare string" someString it "emphasis" emphatic it "monospace" monospace + it "labeled link" labeledLink it "bird tracks" birdTracks it "quotes" quotes it "ignores nesting" ignoreNesting @@ -37,6 +38,20 @@ main = hspec $ do -- Tests ------------ +labeledLink :: Expectation +labeledLink = + "<[link here](http://to.here) label>" + `shouldLexTo` [ (1, 1, LabeledLinkOpen) + , (1, 2, BracketOpen) + , (1, 3, Token "link here") + , (1, 12, BracketClose) + , (1, 13, ParenOpen) + , (1, 14, Token "http://to.here") + , (1, 28, ParenClose) + , (1, 29, Space) + , (1, 30, Token "label>") + ] + maths :: IO () maths = do "\\[some math\\]" @@ -60,8 +75,7 @@ escaping = unicode :: Expectation unicode = "ドラゴンクエストの冒険者🐉" - `shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者") - , (1, 13, Token "🐉") + `shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者🐉") ] ignoreNesting :: Expectation