From 1b59bb9c25883de77e7d7e36752c82186503fc0e Mon Sep 17 00:00:00 2001 From: Igor Ranieri Date: Fri, 26 Sep 2025 22:32:19 +0200 Subject: [PATCH] Formatting --- src/Lexer.hs | 310 ++++++++++++++++++++++++++------------------------- test/Spec.hs | 238 +++++++++++++++++++-------------------- 2 files changed, 275 insertions(+), 273 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 426a7ff..4a85fb5 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -1,10 +1,11 @@ {-# LANGUAGE OverloadedStrings #-} module Lexer ( - Token (..), - lexer, - emphasis, -) where + Token (..), + lexer, + emphasis, +) +where import Control.Monad (mfilter, void) import Data.Functor (($>)) @@ -17,51 +18,52 @@ import Text.Parsec qualified as Parsec import Text.Parsec.Pos (updatePosChar) type Located a = (SourcePos, a) + type LocatedToken = (SourcePos, Token) type Lexer = Parser [LocatedToken] data Level - = One - | Two - | Three - | Four - | Five - | Six - deriving (Eq, Show) + = One + | Two + | Three + | Four + | Five + | Six + deriving (Eq, Show) data Token - = Token Text - | Anchor Text - | BirdTrack - | BoldOpen - | BoldClose - | Escape - | EmphasisOpen - | EmphasisClose - | Header Level - | MonospaceOpen - | MonospaceClose - | Newline - | LinkOpen - | LinkClose - | LabeledLinkOpen - | LabeledLinkClose - | ParenOpen - | ParenClose - | BracketOpen - | BracketClose - | MathInlineOpen - | MathInlineClose - | MathMultilineOpen - | MathMultilineClose - | NumericEntity Int - | Module Text - | QuoteOpen - | QuoteClose - | Space - | EOF - deriving (Eq, Show) + = Token Text + | Anchor Text + | BirdTrack + | BoldOpen + | BoldClose + | Escape + | EmphasisOpen + | EmphasisClose + | Header Level + | MonospaceOpen + | MonospaceClose + | Newline + | LinkOpen + | LinkClose + | LabeledLinkOpen + | LabeledLinkClose + | ParenOpen + | ParenClose + | BracketOpen + | BracketClose + | MathInlineOpen + | MathInlineClose + | MathMultilineOpen + | MathMultilineClose + | NumericEntity Int + | Module Text + | QuoteOpen + | QuoteClose + | Space + | EOF + deriving (Eq, Show) located :: Parser a -> Parser (SourcePos, a) located p = (,) <$> getPosition <*> p @@ -74,74 +76,74 @@ lexer = Parsec.runParser lexText initialParserState "input" . Text.pack lexText :: Parser [LocatedToken] lexText = go - where - go = do - Parsec.optionMaybe Parsec.eof >>= \case - Just _ -> pure [] - Nothing -> do - toks <- - choice $ - Parsec.try - <$> [ mathMultiline - , mathInline - , escape -- maths go before escape to avoid mismatch - , headers - , newlineToken - , spaceToken - , link - , labeledLink - , module_ - , anchor - , textElement - , quotes - , birdTrack - , other - ] - rest <- go - pure (toks <> rest) + where + go = do + Parsec.optionMaybe Parsec.eof >>= \case + Just _ -> pure [] + Nothing -> do + toks <- + choice $ + Parsec.try + <$> [ mathMultiline + , mathInline + , escape -- maths go before escape to avoid mismatch + , headers + , newlineToken + , spaceToken + , link + , labeledLink + , module_ + , anchor + , textElement + , quotes + , birdTrack + , other + ] + rest <- go + pure (toks <> rest) -- Tokens textElement :: Parser [LocatedToken] textElement = - choice $ - Parsec.try - <$> [ emphasis - , bold - , monospace - ] + choice $ + Parsec.try + <$> [ emphasis + , bold + , monospace + ] headers :: Parser [LocatedToken] headers = - choice $ - Parsec.try - <$> [ header1 - , header2 - , header3 - , header4 - , header5 - , header6 - ] + choice $ + Parsec.try + <$> [ header1 + , header2 + , header3 + , header4 + , header5 + , header6 + ] anyUntil :: Parser a -> Parser Text anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p) delimitedAsTuple :: Parser open -> Parser close -> Parser (Located open, LocatedToken, Located close) delimitedAsTuple openP closeP = - (,,) - <$> located openP - <*> located (Token <$> anyUntil closeP) - <*> located closeP + (,,) + <$> located openP + <*> located (Token <$> anyUntil closeP) + <*> located closeP delimited :: Parser open -> Parser close -> Token -> Token -> Parser [LocatedToken] delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP) - where - asList (a, tok, b) = [a, tok, b] + where + asList (a, tok, b) = [a, tok, b] delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken] delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) - where - asList (a, tok, _) = [a, tok] + where + asList (a, tok, _) = [a, tok] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric s t1 t2 = delimited s s t1 t2 @@ -170,9 +172,8 @@ header6 = delimitedNoTrailing "====== " eol (Header Six) -- #anchors# anchor :: Lexer anchor = do - x <- located $ between "#" "#" (Anchor <$> anyUntil "#") - pure [x] - + x <- located $ between "#" "#" (Anchor <$> anyUntil "#") + pure [x] moduleNames :: Parser Text moduleNames = intercalate "." . fmap Text.pack <$> upperId `sepBy1` char '.' @@ -188,47 +189,47 @@ identifierChar = satisfy (\c -> isAlphaNum c || c == '_') -- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben module_ :: Lexer module_ = between (char '"') (char '"') inner - where - inner = do - m <- located $ Module <$> moduleNames - mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText)) - pure $ case mAnchor of - Just anc -> [m, anc] - Nothing -> [m] + where + inner = do + m <- located $ Module <$> moduleNames + mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText)) + pure $ case mAnchor of + Just anc -> [m, anc] + Nothing -> [m] - anchorHash :: Parser Text - anchorHash = "#" <|> try "\\#" + anchorHash :: Parser Text + anchorHash = "#" <|> try "\\#" - anchorText :: Parser Text - anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) + anchorText :: Parser Text + anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) linkRaw :: Lexer linkRaw = - tokenise - [ BracketOpen <$ "[" - , Token <$> anyUntil "]" - , BracketClose <$ "]" - , ParenOpen <$ "(" - , Token <$> anyUntil ")" - , ParenClose <$ ")" - ] + tokenise + [ BracketOpen <$ "[" + , Token <$> anyUntil "]" + , BracketClose <$ "]" + , ParenOpen <$ "(" + , Token <$> anyUntil ")" + , ParenClose <$ ")" + ] link :: Lexer link = do - pos <- getPosition - l <- linkRaw - -- register the position of the last token - pos' <- flip incSourceColumn (-1) <$> getPosition - pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)] + pos <- getPosition + l <- linkRaw + -- register the position of the last token + pos' <- flip incSourceColumn (-1) <$> getPosition + pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)] labeledLink :: Lexer labeledLink = do - open <- located $ LabeledLinkOpen <$ "<" - linkRes <- linkRaw - labelRes <- located $ Token <$> anyUntil ">" - close <- located $ LabeledLinkClose <$ ">" - pure $ - open : linkRes <> [ labelRes , close ] + open <- located $ LabeledLinkOpen <$ "<" + linkRes <- linkRaw + labelRes <- located $ Token <$> anyUntil ">" + close <- located $ LabeledLinkClose <$ ">" + pure $ + open : linkRes <> [labelRes, close] mathMultiline :: Lexer mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose @@ -256,23 +257,23 @@ monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose other :: Lexer other = do - pos <- getPosition - c <- takeWhile1_ isUnicodeAlphaNum - pure . pure $ (pos, Token c) - where - isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c) + pos <- getPosition + c <- takeWhile1_ isUnicodeAlphaNum + pure . pure $ (pos, Token c) + where + isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c) spaceToken :: Lexer spaceToken = do - pos <- getPosition - _ <- many1 (char ' ') - pure . pure $ (pos, Space) + pos <- getPosition + _ <- many1 (char ' ') + pure . pure $ (pos, Space) newlineToken :: Lexer newlineToken = do - pos <- getPosition - _ <- newline - pure . pure $ (pos, Newline) + pos <- getPosition + _ <- newline + pure . pure $ (pos, Newline) ------- -- Helpers @@ -281,11 +282,11 @@ newlineToken = do -- | Like `takeWhile`, but unconditionally take escaped characters. takeWhile_ :: (Char -> Bool) -> Parser Text takeWhile_ p = scan p_ False - where - p_ escaped c - | escaped = Just False - | not $ p c = Nothing - | otherwise = Just (c == '\\') + where + p_ escaped c + | escaped = Just False + | not $ p c = Nothing + | otherwise = Just (c == '\\') -- | Like 'takeWhile1', but unconditionally take escaped characters. takeWhile1_ :: (Char -> Bool) -> Parser Text @@ -295,19 +296,20 @@ takeWhile1_ = mfilter (not . Text.null) . takeWhile_ function returns true. -} scan :: - -- | scan function - (state -> Char -> Maybe state) -> - -- | initial state - state -> - Parser Text + -- | scan function + (state -> Char -> Maybe state) -> + -- | initial state + state -> + Parser Text scan f initState = do - parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState - (remaining, finalPos, ct) <- go input initState pos 0 - let newState = parserState{stateInput = remaining, statePos = finalPos} - Parsec.setParserState newState $> Text.take ct input - where - go !input' !st !posAccum !count' = case Text.uncons input' of - Nothing -> pure (input', posAccum, count') - Just (char', input'') -> case f st char' of - Nothing -> pure (input', posAccum, count') - Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1) + parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState + + (remaining, finalPos, ct) <- go input initState pos 0 + let newState = parserState{stateInput = remaining, statePos = finalPos} + Parsec.setParserState newState $> Text.take ct input + where + go !input' !st !posAccum !count' = case Text.uncons input' of + Nothing -> pure (input', posAccum, count') + Just (char', input'') -> case f st char' of + Nothing -> pure (input', posAccum, count') + Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1) diff --git a/test/Spec.hs b/test/Spec.hs index 745aefa..7258a2d 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -10,33 +10,33 @@ import Types import Data.String (IsString (..)) import Data.Text (Text) -import Text.Parsec.Pos import GHC.Stack +import Text.Parsec.Pos main :: IO () main = hspec $ do - describe "Lexer" do - describe "minimal" do - it "handles unicode" unicode - it "escapes" escaping - it "maths" math - it "anchors" anchor - it "space chars" space - it "bare string" someString - it "emphasis" emphatic - it "monospace" monospace - it "labeled link" labeledLink - it "markdown link" link - it "bird tracks" birdTracks - it "module names" modules - it "quotes" quotes - it "ignores nesting" ignoreNesting + describe "Lexer" do + describe "minimal" do + it "handles unicode" unicode + it "escapes" escaping + it "maths" math + it "anchors" anchor + it "space chars" space + it "bare string" someString + it "emphasis" emphatic + it "monospace" monospace + it "labeled link" labeledLink + it "markdown link" link + it "bird tracks" birdTracks + it "module names" modules + it "quotes" quotes + it "ignores nesting" ignoreNesting - describe "Parser" do - it "Bold" do - "__bold__" `shouldParseTo` (DocBold (DocString "bold")) - it "Emphasis" do - "/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis")) + describe "Parser" do + it "Bold" do + "__bold__" `shouldParseTo` (DocBold (DocString "bold")) + it "Emphasis" do + "/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis")) ------------ -- Tests @@ -44,137 +44,137 @@ main = hspec $ do modules :: Expectation modules = do - "\"MyModule.Name\"" - `shouldLexTo` [ (1, 2, Module "MyModule.Name") - ] + "\"MyModule.Name\"" + `shouldLexTo` [ (1, 2, Module "MyModule.Name") + ] - "\"OtherModule.Name#myAnchor\"" - `shouldLexTo` [ (1, 2, Module "OtherModule.Name") - , (1, 18, Anchor "myAnchor") - ] + "\"OtherModule.Name#myAnchor\"" + `shouldLexTo` [ (1, 2, Module "OtherModule.Name") + , (1, 18, Anchor "myAnchor") + ] - "\"OtherModule.Name\\#myAnchor\"" - `shouldLexTo` [ (1, 2, Module "OtherModule.Name") - , (1, 18, Anchor "myAnchor") - ] + "\"OtherModule.Name\\#myAnchor\"" + `shouldLexTo` [ (1, 2, Module "OtherModule.Name") + , (1, 18, Anchor "myAnchor") + ] link :: Expectation link = - "[link to](http://some.website)" - `shouldLexTo` [ (1, 1, LinkOpen) - , (1, 1, BracketOpen) - , (1, 2, Token "link to") - , (1, 9, BracketClose) - , (1, 10, ParenOpen) - , (1, 11, Token "http://some.website") - , (1, 30, ParenClose) - , (1, 30, LinkClose) - ] + "[link to](http://some.website)" + `shouldLexTo` [ (1, 1, LinkOpen) + , (1, 1, BracketOpen) + , (1, 2, Token "link to") + , (1, 9, BracketClose) + , (1, 10, ParenOpen) + , (1, 11, Token "http://some.website") + , (1, 30, ParenClose) + , (1, 30, LinkClose) + ] labeledLink :: Expectation labeledLink = - "<[link here](http://to.here) label>" - `shouldLexTo` [ (1, 1, LabeledLinkOpen) - , (1, 2, BracketOpen) - , (1, 3, Token "link here") - , (1, 12, BracketClose) - , (1, 13, ParenOpen) - , (1, 14, Token "http://to.here") - , (1, 28, ParenClose) - , (1, 29, Token " label") - , (1, 35, LabeledLinkClose) - ] + "<[link here](http://to.here) label>" + `shouldLexTo` [ (1, 1, LabeledLinkOpen) + , (1, 2, BracketOpen) + , (1, 3, Token "link here") + , (1, 12, BracketClose) + , (1, 13, ParenOpen) + , (1, 14, Token "http://to.here") + , (1, 28, ParenClose) + , (1, 29, Token " label") + , (1, 35, LabeledLinkClose) + ] anchor :: Expectation anchor = - "#myAnchor#" - `shouldLexTo` [ (1, 1, Anchor "myAnchor") - ] + "#myAnchor#" + `shouldLexTo` [ (1, 1, Anchor "myAnchor") + ] math :: IO () math = do - "\\[some math\\]" - `shouldLexTo` [ (1, 1, MathMultilineOpen) - , (1, 3, Token "some math") - , (1, 12, MathMultilineClose) - ] - "\\(other maths\\)" - `shouldLexTo` [ (1, 1, MathInlineOpen) - , (1, 3, Token "other maths") - , (1, 14, MathInlineClose) - ] + "\\[some math\\]" + `shouldLexTo` [ (1, 1, MathMultilineOpen) + , (1, 3, Token "some math") + , (1, 12, MathMultilineClose) + ] + "\\(other maths\\)" + `shouldLexTo` [ (1, 1, MathInlineOpen) + , (1, 3, Token "other maths") + , (1, 14, MathInlineClose) + ] escaping :: Expectation escaping = do - "\\(" - `shouldLexTo` [ (1, 1, Escape) - , (1, 2, Token "(") - ] - "\\(\r\n" - `shouldLexTo` [ (1, 1, Escape) - , (1, 2, Token "(") - ] + "\\(" + `shouldLexTo` [ (1, 1, Escape) + , (1, 2, Token "(") + ] + "\\(\r\n" + `shouldLexTo` [ (1, 1, Escape) + , (1, 2, Token "(") + ] unicode :: Expectation unicode = - "ドラゴンクエストの冒険者🐉" - `shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者🐉") - ] + "ドラゴンクエストの冒険者🐉" + `shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者🐉") + ] ignoreNesting :: Expectation ignoreNesting = - ">/foo/" - `shouldLexTo` [ (1, 1, Token ">/foo/") - ] + ">/foo/" + `shouldLexTo` [ (1, 1, Token ">/foo/") + ] birdTracks :: Expectation birdTracks = - ">> code" - `shouldLexTo` [ (1, 1, BirdTrack) - , (1, 4, Token "code") - ] + ">> code" + `shouldLexTo` [ (1, 1, BirdTrack) + , (1, 4, Token "code") + ] quotes :: Expectation quotes = - "\"quoted\"" - `shouldLexTo` [ (1, 1, QuoteOpen) - , (1, 2, Token "quoted") - , (1, 8, QuoteClose) - ] + "\"quoted\"" + `shouldLexTo` [ (1, 1, QuoteOpen) + , (1, 2, Token "quoted") + , (1, 8, QuoteClose) + ] space :: Expectation space = do - "\n " - `shouldLexTo` [ (1, 1, Newline) - , (2, 1, Space) - ] - " \n" - `shouldLexTo` [ (1, 1, Space) - , (1, 2, Newline) - ] + "\n " + `shouldLexTo` [ (1, 1, Newline) + , (2, 1, Space) + ] + " \n" + `shouldLexTo` [ (1, 1, Space) + , (1, 2, Newline) + ] monospace :: Expectation monospace = - "@mono@" - `shouldLexTo` [ (1, 1, MonospaceOpen) - , (1, 2, Token "mono") - , (1, 6, MonospaceClose) - ] + "@mono@" + `shouldLexTo` [ (1, 1, MonospaceOpen) + , (1, 2, Token "mono") + , (1, 6, MonospaceClose) + ] emphatic :: Expectation emphatic = - "/emphatic/" - `shouldLexTo` [ (1, 1, EmphasisOpen) - , (1, 2, Token "emphatic") - , (1, 10, EmphasisClose) - ] + "/emphatic/" + `shouldLexTo` [ (1, 1, EmphasisOpen) + , (1, 2, Token "emphatic") + , (1, 10, EmphasisClose) + ] someString :: Expectation someString = - "some string" - `shouldLexTo` [ (1, 1, Token "some") - , (1, 5, Space) - , (1, 6, Token "string") - ] + "some string" + `shouldLexTo` [ (1, 1, Token "some") + , (1, 5, Space) + , (1, 6, Token "string") + ] -------------- -- Helpers @@ -183,16 +183,16 @@ someString = type Doc id = DocMarkup () id instance IsString (Doc String) where - fromString = DocString + fromString = DocString shouldLexTo :: String -> [(Int, Int, Token)] -> Expectation shouldLexTo input expected = - withFrozenCallStack $ + withFrozenCallStack $ case lexer input of - Right tokens -> do - let actual = map (\(pos, tok) -> (sourceLine pos, sourceColumn pos, tok)) tokens - actual `shouldBe` expected - Left err -> expectationFailure $ "Parse error: " <> show err + Right tokens -> do + let actual = map (\(pos, tok) -> (sourceLine pos, sourceColumn pos, tok)) tokens + actual `shouldBe` expected + Left err -> expectationFailure $ "Parse error: " <> show err shouldParseTo :: Text -> DocMarkup mod Identifier -> Expectation shouldParseTo input ast = parseText input `shouldBe` ast