From 39cfe2035d8af4ec1edecccb2974d778dedc79b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Wed, 24 Sep 2025 11:01:52 +0000 Subject: [PATCH 1/8] test: freeze the callstack for better failure messages (#1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This would make the function `shouldLexTo` be transparent in the error reporting! Before all errors would point to this line. Reviewed-on: https://git.elland.me/elland/haddock2/pulls/1 Co-authored-by: Léana 江 Co-committed-by: Léana 江 --- test/Spec.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/Spec.hs b/test/Spec.hs index d202ac2..0a7653d 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -11,6 +11,7 @@ import Types import Data.String (IsString (..)) import Data.Text (Text) import Text.Parsec.Pos +import GHC.Stack main :: IO () main = hspec $ do @@ -185,6 +186,7 @@ instance IsString (Doc String) where shouldLexTo :: String -> [(Int, Int, Token)] -> Expectation shouldLexTo input expected = + withFrozenCallStack $ case lexer input of Right tokens -> do let actual = map (\(pos, tok) -> (sourceLine pos, sourceColumn pos, tok)) tokens From 6064afd0b9a9db96d781f83d72a733199b143b71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Fri, 26 Sep 2025 14:44:46 +0000 Subject: [PATCH 2/8] refactor lexer (#2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce manual usage of getPosition and setting it and improve some helper functions. Reviewed-on: https://git.elland.me/elland/haddock2/pulls/2 Reviewed-by: elland Co-authored-by: Léana 江 Co-committed-by: Léana 江 --- Grammar.ebnf | 2 +- src/Lexer.hs | 160 +++++++++++++++++++++++---------------------------- test/Spec.hs | 35 +++++------ 3 files changed, 91 insertions(+), 106 deletions(-) diff --git a/Grammar.ebnf b/Grammar.ebnf index bff331d..2404a67 100644 --- a/Grammar.ebnf +++ b/Grammar.ebnf @@ -12,7 +12,7 @@ bold ::= '__' text_no_newline '__' monospace ::= '@' text_content '@' link ::= module_link | hyperlink | markdown_link -module_link ::= '"' module_name ( '#' anchor_name )? '"' +module_link ::= '"' module_name ( ('#' | '\#') anchor_name )? '"' hyperlink ::= '<' url ( ' ' link_text )? '>' markdown_link ::= '[' link_text '](' ( url | module_link ) ')' diff --git a/src/Lexer.hs b/src/Lexer.hs index e12324b..426a7ff 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -10,12 +10,13 @@ import Control.Monad (mfilter, void) import Data.Functor (($>)) import Data.Text (Text, intercalate) import Data.Text qualified as Text -import GHC.Unicode (isAlpha, isAlphaNum, isControl, isPrint, isSpace, isUpper) +import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper) import ParserMonad (Parser, initialParserState) import Text.Parsec import Text.Parsec qualified as Parsec import Text.Parsec.Pos (updatePosChar) +type Located a = (SourcePos, a) type LocatedToken = (SourcePos, Token) type Lexer = Parser [LocatedToken] @@ -50,12 +51,12 @@ data Token | ParenClose | BracketOpen | BracketClose - | MathsParenOpen - | MathsParenClose - | MathsBracketOpen - | MathsBracketClose + | MathInlineOpen + | MathInlineClose + | MathMultilineOpen + | MathMultilineClose | NumericEntity Int - | Module + | Module Text | QuoteOpen | QuoteClose | Space @@ -65,9 +66,6 @@ data Token located :: Parser a -> Parser (SourcePos, a) located p = (,) <$> getPosition <*> p -startPosition :: Parser a -> Parser SourcePos -startPosition = fmap fst . located - tokenise :: [Parser a] -> Parser [(SourcePos, a)] tokenise = sequence . map located @@ -84,16 +82,16 @@ lexText = go toks <- choice $ Parsec.try - <$> [ mathsBracket - , mathsParens + <$> [ mathMultiline + , mathInline , escape -- maths go before escape to avoid mismatch , headers , newlineToken , spaceToken , link , labeledLink - , modules - , anchors + , module_ + , anchor , textElement , quotes , birdTrack @@ -125,90 +123,84 @@ headers = , header6 ] -delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken] -delimitedMaybe openMark closeMark openToken closeToken = do - openPos <- getPosition - void openMark - tokenPos <- getPosition - content <- anyUntil closeMark - closePos <- getPosition - void closeMark - - let openTok :: LocatedToken = (openPos, openToken) - res :: LocatedToken = (tokenPos, Token content) - closeToks :: [LocatedToken] = case closeToken of - Just close -> [(closePos, close)] - Nothing -> [] - - pure $ [openTok, res] <> closeToks - anyUntil :: Parser a -> Parser Text anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p) -delimited :: Parser a -> Parser a -> Token -> Token -> Parser [LocatedToken] -delimited a b c d = delimitedMaybe a b c (Just d) +delimitedAsTuple :: Parser open -> Parser close -> Parser (Located open, LocatedToken, Located close) +delimitedAsTuple openP closeP = + (,,) + <$> located openP + <*> located (Token <$> anyUntil closeP) + <*> located closeP + +delimited :: Parser open -> Parser close -> Token -> Token -> Parser [LocatedToken] +delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP) + where + asList (a, tok, b) = [a, tok, b] + +delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken] +delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) + where + asList (a, tok, _) = [a, tok] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric s t1 t2 = delimited s s t1 t2 eol :: Parser () -eol = void "\n" <|> Parsec.eof +eol = void "\n" <|> void "\r\n" <|> Parsec.eof header1 :: Lexer -header1 = delimitedMaybe (void $ "= ") eol (Header One) Nothing +header1 = delimitedNoTrailing "= " eol (Header One) header2 :: Lexer -header2 = delimitedMaybe (void $ "== ") eol (Header Two) Nothing +header2 = delimitedNoTrailing "== " eol (Header Two) header3 :: Lexer -header3 = delimitedMaybe (void $ "=== ") eol (Header Three) Nothing +header3 = delimitedNoTrailing "=== " eol (Header Three) header4 :: Lexer -header4 = delimitedMaybe (void $ "==== ") eol (Header Four) Nothing +header4 = delimitedNoTrailing "==== " eol (Header Four) header5 :: Lexer -header5 = delimitedMaybe (void $ "===== ") eol (Header Five) Nothing +header5 = delimitedNoTrailing "===== " eol (Header Five) header6 :: Lexer -header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing +header6 = delimitedNoTrailing "====== " eol (Header Six) -- #anchors# -anchors :: Lexer -anchors = do - pos <- getPosition - void $ try anchor' - txt <- anyUntil anchor' - void $ try anchor' +anchor :: Lexer +anchor = do + x <- located $ between "#" "#" (Anchor <$> anyUntil "#") + pure [x] - pure [(pos, Anchor txt)] - where - anchor' = (string "#" <|> string "\\#") + +moduleNames :: Parser Text +moduleNames = intercalate "." . fmap Text.pack <$> upperId `sepBy1` char '.' + +upperId :: Parser String +upperId = (:) <$> satisfy isUpper <*> many1 identifierChar + +identifierChar :: Parser Char +identifierChar = satisfy (\c -> isAlphaNum c || c == '_') -- "Module.Name" -- "Module.Name#anchor" --- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben -modules :: Lexer -modules = do - startPos <- startPosition $ char '"' - (modPos, modName) <- located modId - anch <- option [] do - anchPos <- startPosition (string "#" <|> string' "\\#") - txt <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) - pure [(anchPos, Anchor txt)] - - void $ char '"' - pure $ [(startPos, Module), (modPos, Token modName)] <> anch +-- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben +module_ :: Lexer +module_ = between (char '"') (char '"') inner where - modId = intercalate "." <$> (fmap Text.pack <$> (conId `sepBy1` (char '.'))) + inner = do + m <- located $ Module <$> moduleNames + mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText)) + pure $ case mAnchor of + Just anc -> [m, anc] + Nothing -> [m] - conId :: Parser String - conId = - (:) - <$> satisfy (\c -> isAlpha c && isUpper c) - <*> many1 conChar + anchorHash :: Parser Text + anchorHash = "#" <|> try "\\#" - conChar :: Parser Char - conChar = satisfy (\c -> isAlphaNum c || c == '_') + anchorText :: Parser Text + anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) linkRaw :: Lexer linkRaw = @@ -225,38 +217,30 @@ link :: Lexer link = do pos <- getPosition l <- linkRaw - -- "unconsume" the last token + -- register the position of the last token pos' <- flip incSourceColumn (-1) <$> getPosition pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)] labeledLink :: Lexer labeledLink = do - pos <- getPosition - void $ string "<" - link' <- linkRaw - pos7 <- getPosition - label' <- anyUntil $ string ">" - pos8 <- getPosition - void $ ">" - + open <- located $ LabeledLinkOpen <$ "<" + linkRes <- linkRaw + labelRes <- located $ Token <$> anyUntil ">" + close <- located $ LabeledLinkClose <$ ">" pure $ - (pos, LabeledLinkOpen) - : link' - <> [ (pos7, Token label') - , (pos8, LabeledLinkClose) - ] + open : linkRes <> [ labelRes , close ] -mathsBracket :: Lexer -mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose +mathMultiline :: Lexer +mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose -mathsParens :: Lexer -mathsParens = delimited (void $ "\\(") (void "\\)") MathsParenOpen MathsParenClose +mathInline :: Lexer +mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose birdTrack :: Lexer -birdTrack = delimitedMaybe (void ">> ") eol BirdTrack Nothing +birdTrack = delimitedNoTrailing ">> " eol BirdTrack escape :: Lexer -escape = delimitedMaybe (void "\\") eol Escape Nothing +escape = delimitedNoTrailing "\\" eol Escape quotes :: Lexer quotes = delimitedSymmetric "\"" QuoteOpen QuoteClose diff --git a/test/Spec.hs b/test/Spec.hs index 0a7653d..745aefa 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -19,8 +19,8 @@ main = hspec $ do describe "minimal" do it "handles unicode" unicode it "escapes" escaping - it "maths" maths - it "anchors" anchors + it "maths" math + it "anchors" anchor it "space chars" space it "bare string" someString it "emphasis" emphatic @@ -45,19 +45,16 @@ main = hspec $ do modules :: Expectation modules = do "\"MyModule.Name\"" - `shouldLexTo` [ (1, 1, Module) - , (1, 2, Token "MyModule.Name") + `shouldLexTo` [ (1, 2, Module "MyModule.Name") ] "\"OtherModule.Name#myAnchor\"" - `shouldLexTo` [ (1, 1, Module) - , (1, 2, Token "OtherModule.Name") + `shouldLexTo` [ (1, 2, Module "OtherModule.Name") , (1, 18, Anchor "myAnchor") ] "\"OtherModule.Name\\#myAnchor\"" - `shouldLexTo` [ (1, 1, Module) - , (1, 2, Token "OtherModule.Name") + `shouldLexTo` [ (1, 2, Module "OtherModule.Name") , (1, 18, Anchor "myAnchor") ] link :: Expectation @@ -87,31 +84,35 @@ labeledLink = , (1, 35, LabeledLinkClose) ] -anchors :: Expectation -anchors = +anchor :: Expectation +anchor = "#myAnchor#" `shouldLexTo` [ (1, 1, Anchor "myAnchor") ] -maths :: IO () -maths = do +math :: IO () +math = do "\\[some math\\]" - `shouldLexTo` [ (1, 1, MathsBracketOpen) + `shouldLexTo` [ (1, 1, MathMultilineOpen) , (1, 3, Token "some math") - , (1, 12, MathsBracketClose) + , (1, 12, MathMultilineClose) ] "\\(other maths\\)" - `shouldLexTo` [ (1, 1, MathsParenOpen) + `shouldLexTo` [ (1, 1, MathInlineOpen) , (1, 3, Token "other maths") - , (1, 14, MathsParenClose) + , (1, 14, MathInlineClose) ] escaping :: Expectation -escaping = +escaping = do "\\(" `shouldLexTo` [ (1, 1, Escape) , (1, 2, Token "(") ] + "\\(\r\n" + `shouldLexTo` [ (1, 1, Escape) + , (1, 2, Token "(") + ] unicode :: Expectation unicode = From f4912d3339dad16dd29ffc6cd15023a94d14fb96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Fri, 26 Sep 2025 14:52:34 +0000 Subject: [PATCH 3/8] Format with fourmolu (#3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the formatting config (similar to default but with indent 2) Reviewed-on: https://git.elland.me/elland/haddock2/pulls/3 Co-authored-by: Léana 江 Co-committed-by: Léana 江 --- .git-blame-ignore-revs | 2 ++ fourmolu.yaml | 72 ++++++++++++++++++++++++++++++++++++++++++ src/Types.hs | 4 +-- 3 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 .git-blame-ignore-revs create mode 100644 fourmolu.yaml diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..f70a7a2 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,2 @@ +# Fourmolu +9998ac92263127b05fd1eb607f3b7740c69d3a58 diff --git a/fourmolu.yaml b/fourmolu.yaml new file mode 100644 index 0000000..acded78 --- /dev/null +++ b/fourmolu.yaml @@ -0,0 +1,72 @@ +# Number of spaces per indentation step +indentation: 2 + +# Max line length for automatic line breaking +column-limit: none + +# Styling of arrows in type signatures (choices: trailing, leading, or leading-args) +function-arrows: trailing + +# How to place commas in multi-line lists, records, etc. (choices: leading or trailing) +comma-style: leading + +# Styling of import/export lists (choices: leading, trailing, or diff-friendly) +import-export-style: diff-friendly + +# Rules for grouping import declarations +import-grouping: legacy + +# Whether to full-indent or half-indent 'where' bindings past the preceding body +indent-wheres: false + +# Whether to leave a space before an opening record brace +record-brace-space: false + +# Number of spaces between top-level declarations +newlines-between-decls: 1 + +# How to print Haddock comments (choices: single-line, multi-line, or multi-line-compact) +haddock-style: multi-line + +# How to print module docstring +haddock-style-module: null + +# Styling of let blocks (choices: auto, inline, newline, or mixed) +let-style: auto + +# How to align the 'in' keyword with respect to the 'let' keyword (choices: left-align, right-align, or no-space) +in-style: right-align + +# Whether to put parentheses around a single constraint (choices: auto, always, or never) +single-constraint-parens: always + +# Whether to put parentheses around a single deriving class (choices: auto, always, or never) +single-deriving-parens: always + +# Whether to sort constraints +sort-constraints: false + +# Whether to sort derived classes +sort-derived-classes: false + +# Whether to sort deriving clauses +sort-deriving-clauses: false + +# Whether to place section operators (those that are infixr 0, such as $) in trailing position, continuing the expression indented below +trailing-section-operators: true + +# Output Unicode syntax (choices: detect, always, or never) +unicode: never + +# Give the programmer more choice on where to insert blank lines +respectful: true + +# Fixity information for operators +fixities: [] + +# Module reexports Fourmolu should know about +reexports: [] + +# Modules defined by the current Cabal package for import grouping +local-modules: [] + diff --git a/src/Types.hs b/src/Types.hs index 615c401..a41e38b 100644 --- a/src/Types.hs +++ b/src/Types.hs @@ -105,10 +105,10 @@ data DocMarkup mod id -} DocMathInline String | {- | Math multiline display - \[ + \[ mathematical expression in multiple lines - \] + \] -} DocMathDisplay String | {- | Anchors, no spaces allowed From d8ba47a8b65c78c79ba65b1b08dbc46ee151559d Mon Sep 17 00:00:00 2001 From: Igor Ranieri Date: Sat, 27 Sep 2025 07:51:09 +0000 Subject: [PATCH 4/8] Applied more formatting; added Makefile (#4) Reviewed-on: https://git.elland.me/elland/haddock2/pulls/4 Co-authored-by: Igor Ranieri Co-committed-by: Igor Ranieri --- Makefile | 9 ++ haddock2.cabal | 2 +- src/Lexer.hs | 310 +++++++++++++++++++++++++------------------------ test/Spec.hs | 238 ++++++++++++++++++------------------- 4 files changed, 285 insertions(+), 274 deletions(-) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..62cad36 --- /dev/null +++ b/Makefile @@ -0,0 +1,9 @@ +.PHONY: help +help: ## Show this help. + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + +.PHONY: format +format: + find . -type f -name "*.hs" ! -path "./dist-newstyle/*" -exec fourmolu -i {} + + cabal-gild --io=haddock2.cabal + diff --git a/haddock2.cabal b/haddock2.cabal index 7fe124c..9ae122d 100644 --- a/haddock2.cabal +++ b/haddock2.cabal @@ -46,10 +46,10 @@ test-suite haddock2-test type: exitcode-stdio-1.0 main-is: Spec.hs build-depends: - parsec ^>=3.1.18.0, base >=4.20.1.0, haddock2:{haddock2-lib}, hspec ^>=2.11.0, + parsec ^>=3.1.18.0, text ^>=2.1.2, hs-source-dirs: test diff --git a/src/Lexer.hs b/src/Lexer.hs index 426a7ff..4a85fb5 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -1,10 +1,11 @@ {-# LANGUAGE OverloadedStrings #-} module Lexer ( - Token (..), - lexer, - emphasis, -) where + Token (..), + lexer, + emphasis, +) +where import Control.Monad (mfilter, void) import Data.Functor (($>)) @@ -17,51 +18,52 @@ import Text.Parsec qualified as Parsec import Text.Parsec.Pos (updatePosChar) type Located a = (SourcePos, a) + type LocatedToken = (SourcePos, Token) type Lexer = Parser [LocatedToken] data Level - = One - | Two - | Three - | Four - | Five - | Six - deriving (Eq, Show) + = One + | Two + | Three + | Four + | Five + | Six + deriving (Eq, Show) data Token - = Token Text - | Anchor Text - | BirdTrack - | BoldOpen - | BoldClose - | Escape - | EmphasisOpen - | EmphasisClose - | Header Level - | MonospaceOpen - | MonospaceClose - | Newline - | LinkOpen - | LinkClose - | LabeledLinkOpen - | LabeledLinkClose - | ParenOpen - | ParenClose - | BracketOpen - | BracketClose - | MathInlineOpen - | MathInlineClose - | MathMultilineOpen - | MathMultilineClose - | NumericEntity Int - | Module Text - | QuoteOpen - | QuoteClose - | Space - | EOF - deriving (Eq, Show) + = Token Text + | Anchor Text + | BirdTrack + | BoldOpen + | BoldClose + | Escape + | EmphasisOpen + | EmphasisClose + | Header Level + | MonospaceOpen + | MonospaceClose + | Newline + | LinkOpen + | LinkClose + | LabeledLinkOpen + | LabeledLinkClose + | ParenOpen + | ParenClose + | BracketOpen + | BracketClose + | MathInlineOpen + | MathInlineClose + | MathMultilineOpen + | MathMultilineClose + | NumericEntity Int + | Module Text + | QuoteOpen + | QuoteClose + | Space + | EOF + deriving (Eq, Show) located :: Parser a -> Parser (SourcePos, a) located p = (,) <$> getPosition <*> p @@ -74,74 +76,74 @@ lexer = Parsec.runParser lexText initialParserState "input" . Text.pack lexText :: Parser [LocatedToken] lexText = go - where - go = do - Parsec.optionMaybe Parsec.eof >>= \case - Just _ -> pure [] - Nothing -> do - toks <- - choice $ - Parsec.try - <$> [ mathMultiline - , mathInline - , escape -- maths go before escape to avoid mismatch - , headers - , newlineToken - , spaceToken - , link - , labeledLink - , module_ - , anchor - , textElement - , quotes - , birdTrack - , other - ] - rest <- go - pure (toks <> rest) + where + go = do + Parsec.optionMaybe Parsec.eof >>= \case + Just _ -> pure [] + Nothing -> do + toks <- + choice $ + Parsec.try + <$> [ mathMultiline + , mathInline + , escape -- maths go before escape to avoid mismatch + , headers + , newlineToken + , spaceToken + , link + , labeledLink + , module_ + , anchor + , textElement + , quotes + , birdTrack + , other + ] + rest <- go + pure (toks <> rest) -- Tokens textElement :: Parser [LocatedToken] textElement = - choice $ - Parsec.try - <$> [ emphasis - , bold - , monospace - ] + choice $ + Parsec.try + <$> [ emphasis + , bold + , monospace + ] headers :: Parser [LocatedToken] headers = - choice $ - Parsec.try - <$> [ header1 - , header2 - , header3 - , header4 - , header5 - , header6 - ] + choice $ + Parsec.try + <$> [ header1 + , header2 + , header3 + , header4 + , header5 + , header6 + ] anyUntil :: Parser a -> Parser Text anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p) delimitedAsTuple :: Parser open -> Parser close -> Parser (Located open, LocatedToken, Located close) delimitedAsTuple openP closeP = - (,,) - <$> located openP - <*> located (Token <$> anyUntil closeP) - <*> located closeP + (,,) + <$> located openP + <*> located (Token <$> anyUntil closeP) + <*> located closeP delimited :: Parser open -> Parser close -> Token -> Token -> Parser [LocatedToken] delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP) - where - asList (a, tok, b) = [a, tok, b] + where + asList (a, tok, b) = [a, tok, b] delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken] delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) - where - asList (a, tok, _) = [a, tok] + where + asList (a, tok, _) = [a, tok] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric s t1 t2 = delimited s s t1 t2 @@ -170,9 +172,8 @@ header6 = delimitedNoTrailing "====== " eol (Header Six) -- #anchors# anchor :: Lexer anchor = do - x <- located $ between "#" "#" (Anchor <$> anyUntil "#") - pure [x] - + x <- located $ between "#" "#" (Anchor <$> anyUntil "#") + pure [x] moduleNames :: Parser Text moduleNames = intercalate "." . fmap Text.pack <$> upperId `sepBy1` char '.' @@ -188,47 +189,47 @@ identifierChar = satisfy (\c -> isAlphaNum c || c == '_') -- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben module_ :: Lexer module_ = between (char '"') (char '"') inner - where - inner = do - m <- located $ Module <$> moduleNames - mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText)) - pure $ case mAnchor of - Just anc -> [m, anc] - Nothing -> [m] + where + inner = do + m <- located $ Module <$> moduleNames + mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText)) + pure $ case mAnchor of + Just anc -> [m, anc] + Nothing -> [m] - anchorHash :: Parser Text - anchorHash = "#" <|> try "\\#" + anchorHash :: Parser Text + anchorHash = "#" <|> try "\\#" - anchorText :: Parser Text - anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) + anchorText :: Parser Text + anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) linkRaw :: Lexer linkRaw = - tokenise - [ BracketOpen <$ "[" - , Token <$> anyUntil "]" - , BracketClose <$ "]" - , ParenOpen <$ "(" - , Token <$> anyUntil ")" - , ParenClose <$ ")" - ] + tokenise + [ BracketOpen <$ "[" + , Token <$> anyUntil "]" + , BracketClose <$ "]" + , ParenOpen <$ "(" + , Token <$> anyUntil ")" + , ParenClose <$ ")" + ] link :: Lexer link = do - pos <- getPosition - l <- linkRaw - -- register the position of the last token - pos' <- flip incSourceColumn (-1) <$> getPosition - pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)] + pos <- getPosition + l <- linkRaw + -- register the position of the last token + pos' <- flip incSourceColumn (-1) <$> getPosition + pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)] labeledLink :: Lexer labeledLink = do - open <- located $ LabeledLinkOpen <$ "<" - linkRes <- linkRaw - labelRes <- located $ Token <$> anyUntil ">" - close <- located $ LabeledLinkClose <$ ">" - pure $ - open : linkRes <> [ labelRes , close ] + open <- located $ LabeledLinkOpen <$ "<" + linkRes <- linkRaw + labelRes <- located $ Token <$> anyUntil ">" + close <- located $ LabeledLinkClose <$ ">" + pure $ + open : linkRes <> [labelRes, close] mathMultiline :: Lexer mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose @@ -256,23 +257,23 @@ monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose other :: Lexer other = do - pos <- getPosition - c <- takeWhile1_ isUnicodeAlphaNum - pure . pure $ (pos, Token c) - where - isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c) + pos <- getPosition + c <- takeWhile1_ isUnicodeAlphaNum + pure . pure $ (pos, Token c) + where + isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c) spaceToken :: Lexer spaceToken = do - pos <- getPosition - _ <- many1 (char ' ') - pure . pure $ (pos, Space) + pos <- getPosition + _ <- many1 (char ' ') + pure . pure $ (pos, Space) newlineToken :: Lexer newlineToken = do - pos <- getPosition - _ <- newline - pure . pure $ (pos, Newline) + pos <- getPosition + _ <- newline + pure . pure $ (pos, Newline) ------- -- Helpers @@ -281,11 +282,11 @@ newlineToken = do -- | Like `takeWhile`, but unconditionally take escaped characters. takeWhile_ :: (Char -> Bool) -> Parser Text takeWhile_ p = scan p_ False - where - p_ escaped c - | escaped = Just False - | not $ p c = Nothing - | otherwise = Just (c == '\\') + where + p_ escaped c + | escaped = Just False + | not $ p c = Nothing + | otherwise = Just (c == '\\') -- | Like 'takeWhile1', but unconditionally take escaped characters. takeWhile1_ :: (Char -> Bool) -> Parser Text @@ -295,19 +296,20 @@ takeWhile1_ = mfilter (not . Text.null) . takeWhile_ function returns true. -} scan :: - -- | scan function - (state -> Char -> Maybe state) -> - -- | initial state - state -> - Parser Text + -- | scan function + (state -> Char -> Maybe state) -> + -- | initial state + state -> + Parser Text scan f initState = do - parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState - (remaining, finalPos, ct) <- go input initState pos 0 - let newState = parserState{stateInput = remaining, statePos = finalPos} - Parsec.setParserState newState $> Text.take ct input - where - go !input' !st !posAccum !count' = case Text.uncons input' of - Nothing -> pure (input', posAccum, count') - Just (char', input'') -> case f st char' of - Nothing -> pure (input', posAccum, count') - Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1) + parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState + + (remaining, finalPos, ct) <- go input initState pos 0 + let newState = parserState{stateInput = remaining, statePos = finalPos} + Parsec.setParserState newState $> Text.take ct input + where + go !input' !st !posAccum !count' = case Text.uncons input' of + Nothing -> pure (input', posAccum, count') + Just (char', input'') -> case f st char' of + Nothing -> pure (input', posAccum, count') + Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1) diff --git a/test/Spec.hs b/test/Spec.hs index 745aefa..7258a2d 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -10,33 +10,33 @@ import Types import Data.String (IsString (..)) import Data.Text (Text) -import Text.Parsec.Pos import GHC.Stack +import Text.Parsec.Pos main :: IO () main = hspec $ do - describe "Lexer" do - describe "minimal" do - it "handles unicode" unicode - it "escapes" escaping - it "maths" math - it "anchors" anchor - it "space chars" space - it "bare string" someString - it "emphasis" emphatic - it "monospace" monospace - it "labeled link" labeledLink - it "markdown link" link - it "bird tracks" birdTracks - it "module names" modules - it "quotes" quotes - it "ignores nesting" ignoreNesting + describe "Lexer" do + describe "minimal" do + it "handles unicode" unicode + it "escapes" escaping + it "maths" math + it "anchors" anchor + it "space chars" space + it "bare string" someString + it "emphasis" emphatic + it "monospace" monospace + it "labeled link" labeledLink + it "markdown link" link + it "bird tracks" birdTracks + it "module names" modules + it "quotes" quotes + it "ignores nesting" ignoreNesting - describe "Parser" do - it "Bold" do - "__bold__" `shouldParseTo` (DocBold (DocString "bold")) - it "Emphasis" do - "/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis")) + describe "Parser" do + it "Bold" do + "__bold__" `shouldParseTo` (DocBold (DocString "bold")) + it "Emphasis" do + "/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis")) ------------ -- Tests @@ -44,137 +44,137 @@ main = hspec $ do modules :: Expectation modules = do - "\"MyModule.Name\"" - `shouldLexTo` [ (1, 2, Module "MyModule.Name") - ] + "\"MyModule.Name\"" + `shouldLexTo` [ (1, 2, Module "MyModule.Name") + ] - "\"OtherModule.Name#myAnchor\"" - `shouldLexTo` [ (1, 2, Module "OtherModule.Name") - , (1, 18, Anchor "myAnchor") - ] + "\"OtherModule.Name#myAnchor\"" + `shouldLexTo` [ (1, 2, Module "OtherModule.Name") + , (1, 18, Anchor "myAnchor") + ] - "\"OtherModule.Name\\#myAnchor\"" - `shouldLexTo` [ (1, 2, Module "OtherModule.Name") - , (1, 18, Anchor "myAnchor") - ] + "\"OtherModule.Name\\#myAnchor\"" + `shouldLexTo` [ (1, 2, Module "OtherModule.Name") + , (1, 18, Anchor "myAnchor") + ] link :: Expectation link = - "[link to](http://some.website)" - `shouldLexTo` [ (1, 1, LinkOpen) - , (1, 1, BracketOpen) - , (1, 2, Token "link to") - , (1, 9, BracketClose) - , (1, 10, ParenOpen) - , (1, 11, Token "http://some.website") - , (1, 30, ParenClose) - , (1, 30, LinkClose) - ] + "[link to](http://some.website)" + `shouldLexTo` [ (1, 1, LinkOpen) + , (1, 1, BracketOpen) + , (1, 2, Token "link to") + , (1, 9, BracketClose) + , (1, 10, ParenOpen) + , (1, 11, Token "http://some.website") + , (1, 30, ParenClose) + , (1, 30, LinkClose) + ] labeledLink :: Expectation labeledLink = - "<[link here](http://to.here) label>" - `shouldLexTo` [ (1, 1, LabeledLinkOpen) - , (1, 2, BracketOpen) - , (1, 3, Token "link here") - , (1, 12, BracketClose) - , (1, 13, ParenOpen) - , (1, 14, Token "http://to.here") - , (1, 28, ParenClose) - , (1, 29, Token " label") - , (1, 35, LabeledLinkClose) - ] + "<[link here](http://to.here) label>" + `shouldLexTo` [ (1, 1, LabeledLinkOpen) + , (1, 2, BracketOpen) + , (1, 3, Token "link here") + , (1, 12, BracketClose) + , (1, 13, ParenOpen) + , (1, 14, Token "http://to.here") + , (1, 28, ParenClose) + , (1, 29, Token " label") + , (1, 35, LabeledLinkClose) + ] anchor :: Expectation anchor = - "#myAnchor#" - `shouldLexTo` [ (1, 1, Anchor "myAnchor") - ] + "#myAnchor#" + `shouldLexTo` [ (1, 1, Anchor "myAnchor") + ] math :: IO () math = do - "\\[some math\\]" - `shouldLexTo` [ (1, 1, MathMultilineOpen) - , (1, 3, Token "some math") - , (1, 12, MathMultilineClose) - ] - "\\(other maths\\)" - `shouldLexTo` [ (1, 1, MathInlineOpen) - , (1, 3, Token "other maths") - , (1, 14, MathInlineClose) - ] + "\\[some math\\]" + `shouldLexTo` [ (1, 1, MathMultilineOpen) + , (1, 3, Token "some math") + , (1, 12, MathMultilineClose) + ] + "\\(other maths\\)" + `shouldLexTo` [ (1, 1, MathInlineOpen) + , (1, 3, Token "other maths") + , (1, 14, MathInlineClose) + ] escaping :: Expectation escaping = do - "\\(" - `shouldLexTo` [ (1, 1, Escape) - , (1, 2, Token "(") - ] - "\\(\r\n" - `shouldLexTo` [ (1, 1, Escape) - , (1, 2, Token "(") - ] + "\\(" + `shouldLexTo` [ (1, 1, Escape) + , (1, 2, Token "(") + ] + "\\(\r\n" + `shouldLexTo` [ (1, 1, Escape) + , (1, 2, Token "(") + ] unicode :: Expectation unicode = - "ドラゴンクエストの冒険者🐉" - `shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者🐉") - ] + "ドラゴンクエストの冒険者🐉" + `shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者🐉") + ] ignoreNesting :: Expectation ignoreNesting = - ">/foo/" - `shouldLexTo` [ (1, 1, Token ">/foo/") - ] + ">/foo/" + `shouldLexTo` [ (1, 1, Token ">/foo/") + ] birdTracks :: Expectation birdTracks = - ">> code" - `shouldLexTo` [ (1, 1, BirdTrack) - , (1, 4, Token "code") - ] + ">> code" + `shouldLexTo` [ (1, 1, BirdTrack) + , (1, 4, Token "code") + ] quotes :: Expectation quotes = - "\"quoted\"" - `shouldLexTo` [ (1, 1, QuoteOpen) - , (1, 2, Token "quoted") - , (1, 8, QuoteClose) - ] + "\"quoted\"" + `shouldLexTo` [ (1, 1, QuoteOpen) + , (1, 2, Token "quoted") + , (1, 8, QuoteClose) + ] space :: Expectation space = do - "\n " - `shouldLexTo` [ (1, 1, Newline) - , (2, 1, Space) - ] - " \n" - `shouldLexTo` [ (1, 1, Space) - , (1, 2, Newline) - ] + "\n " + `shouldLexTo` [ (1, 1, Newline) + , (2, 1, Space) + ] + " \n" + `shouldLexTo` [ (1, 1, Space) + , (1, 2, Newline) + ] monospace :: Expectation monospace = - "@mono@" - `shouldLexTo` [ (1, 1, MonospaceOpen) - , (1, 2, Token "mono") - , (1, 6, MonospaceClose) - ] + "@mono@" + `shouldLexTo` [ (1, 1, MonospaceOpen) + , (1, 2, Token "mono") + , (1, 6, MonospaceClose) + ] emphatic :: Expectation emphatic = - "/emphatic/" - `shouldLexTo` [ (1, 1, EmphasisOpen) - , (1, 2, Token "emphatic") - , (1, 10, EmphasisClose) - ] + "/emphatic/" + `shouldLexTo` [ (1, 1, EmphasisOpen) + , (1, 2, Token "emphatic") + , (1, 10, EmphasisClose) + ] someString :: Expectation someString = - "some string" - `shouldLexTo` [ (1, 1, Token "some") - , (1, 5, Space) - , (1, 6, Token "string") - ] + "some string" + `shouldLexTo` [ (1, 1, Token "some") + , (1, 5, Space) + , (1, 6, Token "string") + ] -------------- -- Helpers @@ -183,16 +183,16 @@ someString = type Doc id = DocMarkup () id instance IsString (Doc String) where - fromString = DocString + fromString = DocString shouldLexTo :: String -> [(Int, Int, Token)] -> Expectation shouldLexTo input expected = - withFrozenCallStack $ + withFrozenCallStack $ case lexer input of - Right tokens -> do - let actual = map (\(pos, tok) -> (sourceLine pos, sourceColumn pos, tok)) tokens - actual `shouldBe` expected - Left err -> expectationFailure $ "Parse error: " <> show err + Right tokens -> do + let actual = map (\(pos, tok) -> (sourceLine pos, sourceColumn pos, tok)) tokens + actual `shouldBe` expected + Left err -> expectationFailure $ "Parse error: " <> show err shouldParseTo :: Text -> DocMarkup mod Identifier -> Expectation shouldParseTo input ast = parseText input `shouldBe` ast From 08dc87a3076c0deab4703db6d51064f4d6a2e6b4 Mon Sep 17 00:00:00 2001 From: Igor Ranieri Date: Sat, 27 Sep 2025 15:03:00 +0000 Subject: [PATCH 5/8] Add CI runner (#5) Reviewed-on: https://git.elland.me/elland/haddock2/pulls/5 Co-authored-by: Igor Ranieri Co-committed-by: Igor Ranieri --- .forgejo/workflows/test.yaml | 99 ++++++++++++++++++++++++++++++++++++ Dockerfile | 36 +++++++++++++ Makefile | 41 +++++++++++++-- src/Lexer.hs | 4 +- src/Parser/Util.hs | 4 +- src/Types.hs | 83 +++++++++++++++--------------- test/Spec.hs | 14 ++--- 7 files changed, 228 insertions(+), 53 deletions(-) create mode 100644 .forgejo/workflows/test.yaml create mode 100644 Dockerfile diff --git a/.forgejo/workflows/test.yaml b/.forgejo/workflows/test.yaml new file mode 100644 index 0000000..ee81782 --- /dev/null +++ b/.forgejo/workflows/test.yaml @@ -0,0 +1,99 @@ +name: Haskell CI +on: + pull_request: + branches: + - dev + - main + push: + branches: + - main + +jobs: + build: + runs-on: docker + container: + image: elland/haddock2:latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Check versions + run: | + ghc --version + cabal --version + node --version + - name: Cache Cabal packages + uses: actions/cache@v4 + with: + path: | + ~/.cabal/packages + ~/.cabal/store + dist-newstyle + key: ${{ runner.os }}-haskell-9.10-cabal-${{ hashFiles('**/*.cabal', '**/cabal.project') }} + restore-keys: | + ${{ runner.os }}-haskell-9.10-cabal- + - name: Update Cabal package index + run: cabal update + - name: Configure project + run: cabal configure --enable-tests --enable-benchmarks + - name: Build dependencies + run: cabal build --only-dependencies --enable-tests --enable-benchmarks + - name: Build project + run: cabal build --enable-tests --enable-benchmarks + - name: Run documentation build + run: cabal haddock + + test: + runs-on: docker + container: + image: elland/haddock2:latest + needs: build + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Cache Cabal packages + uses: actions/cache@v4 + with: + path: | + ~/.cabal/packages + ~/.cabal/store + dist-newstyle + key: ${{ runner.os }}-haskell-9.10-cabal-${{ hashFiles('**/*.cabal', '**/cabal.project') }} + restore-keys: | + ${{ runner.os }}-haskell-9.10-cabal- + - name: Update Cabal package index + run: cabal update + - name: Configure project + run: cabal configure --enable-tests --enable-benchmarks + - name: Build dependencies + run: cabal build --only-dependencies --enable-tests --enable-benchmarks + - name: Build project + run: cabal build --enable-tests --enable-benchmarks + - name: Run tests + run: cabal test --test-show-details=direct + + fourmolu: + runs-on: docker + container: + image: elland/haddock2:latest + needs: build + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Run fourmolu + run: | + find src test app -name "*.hs" -exec fourmolu --check-idempotence {} \; 2>/dev/null || true + find src test app -name "*.hs" -exec fourmolu --mode check {} \; + + hlint: + runs-on: docker + container: + image: elland/haddock2:latest + needs: build + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Run hlint + run: | + if [ -d src ]; then hlint src/; fi + if [ -d test ]; then hlint test/; fi + if [ -d app ]; then hlint app/; fi diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c2919c5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,36 @@ +FROM haskell:9.10.2-bullseye AS builder + +RUN apt-get update && apt-get install -y curl git && rm -rf /var/lib/apt/lists/* + +RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ + apt-get install -y nodejs && \ + rm -rf /var/lib/apt/lists/* + +RUN cabal update && \ + cabal install --install-method=copy --installdir=/usr/local/bin \ + fourmolu hlint cabal-gild + +WORKDIR /workspace + +FROM haskell:9.10.2-bullseye + +RUN apt-get update && apt-get install -y \ + libgmp10 \ + curl \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ + apt-get install -y nodejs && \ + rm -rf /var/lib/apt/lists/* + +RUN cabal update + +COPY --from=builder /usr/local/bin/cabal /usr/local/bin/ +COPY --from=builder /usr/local/bin/fourmolu /usr/local/bin/ +COPY --from=builder /usr/local/bin/hlint /usr/local/bin/ +COPY --from=builder /usr/local/bin/cabal-gild /usr/local/bin/ + +WORKDIR /workspace + +CMD [ "bash" ] + diff --git a/Makefile b/Makefile index 62cad36..6bc9ca2 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,44 @@ .PHONY: help -help: ## Show this help. +help: ## Show this help @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' -.PHONY: format -format: +.PHONY: build +build: ## Build the project + cabal build + +.PHONY: test +test: ## Run tests + cabal test --test-show-details=direct + +.PHONY: clean +clean: ## Clean build artifacts + cabal clean + +.PHONY: fourmolu +fourmolu: ## Format Haskell code find . -type f -name "*.hs" ! -path "./dist-newstyle/*" -exec fourmolu -i {} + + +.PHONY: fourmolu-check +fourmolu-check: ## Check if code is formatted + find . -type f -name "*.hs" ! -path "./dist-newstyle/*" -exec fourmolu --mode check {} \; + +.PHONY: lint +lint: ## Run hlint + hlint src test app + +.PHONY: cabal-gild +cabal-gild: ## Format cabal file cabal-gild --io=haddock2.cabal +.PHONY: format +format: fourmolu cabal-gild ## Run all formatters + +.PHONY: check +check: fourmolu-check lint ## Run all checks (CI-style) + +.PHONY: ci +ci: build test check ## Run full CI pipeline locally + +.PHONY: docs +docs: ## Generate documentation + cabal haddock --haddock-hyperlink-source diff --git a/src/Lexer.hs b/src/Lexer.hs index 4a85fb5..84a8373 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -69,7 +69,7 @@ located :: Parser a -> Parser (SourcePos, a) located p = (,) <$> getPosition <*> p tokenise :: [Parser a] -> Parser [(SourcePos, a)] -tokenise = sequence . map located +tokenise = mapM located lexer :: String -> Either ParseError [LocatedToken] lexer = Parsec.runParser lexText initialParserState "input" . Text.pack @@ -146,7 +146,7 @@ delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok asList (a, tok, _) = [a, tok] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] -delimitedSymmetric s t1 t2 = delimited s s t1 t2 +delimitedSymmetric s = delimited s s eol :: Parser () eol = void "\n" <|> void "\r\n" <|> Parsec.eof diff --git a/src/Parser/Util.hs b/src/Parser/Util.hs index 4cf96cb..a75fcef 100644 --- a/src/Parser/Util.hs +++ b/src/Parser/Util.hs @@ -13,7 +13,9 @@ import Text.Parsec.Pos (updatePosChar) Return everything consumed except for the end pattern itself. -} takeUntil :: Text -> Parser Text -takeUntil end_ = Text.dropEnd (Text.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome +takeUntil end_ = + requireEnd (scan p (False, end)) + >>= gotSome . Text.dropEnd (Text.length end_) where end = Text.unpack end_ diff --git a/src/Types.hs b/src/Types.hs index a41e38b..ec7a4e4 100644 --- a/src/Types.hs +++ b/src/Types.hs @@ -9,6 +9,8 @@ module Types ( ) where +import Data.Foldable (fold) + newtype Document = Document { meta :: Meta } @@ -28,6 +30,7 @@ data Since = Since -- Could have a better type? type Version = [Int] + type Package = String data DocMarkup mod id @@ -51,78 +54,78 @@ data DocMarkup mod id | -- | Bold __bold text__ DocBold (DocMarkup mod id) | {- | Unordered lists - * this - or - - this + * this + or + - this -} DocUnorderedList [DocMarkup mod id] | {- | Ordered lists - 1. this - or - (1) this + 1. this + or + (1) this -} DocOrderedList [(Int, DocMarkup mod id)] | {- | Definition lists - [term] a term - [another term] another definition + [term] a term + [another term] another definition -} DocDefinitionList [(DocMarkup mod id, DocMarkup mod id)] | {- | Code blocks - @ - a code block in here - with multiple lines - @ + @ + a code block in here + with multiple lines + @ - Or with bird tracks: - > some code - > goes here + Or with bird tracks: + > some code + > goes here -} DocCodeBlock (DocMarkup mod id) | {- | Hyperlinks - __marked__: - - - __Auto-detected URLs__: - http://example.com - https://example.com - ftp://example.com - __Markdown style__ - [link text](http://example.com) - [link text]("Module.Name") + __marked__: + + + __Auto-detected URLs__: + http://example.com + https://example.com + ftp://example.com + __Markdown style__ + [link text](http://example.com) + [link text]("Module.Name") -} DocHyperlink (Hyperlink (DocMarkup mod id)) | {- | Pictures - <> - <> + <> + <> - __Markdown Images__ + __Markdown Images__ - ![alt text](image.png) + ![alt text](image.png) -} DocPicture Picture | {- | Inline math expressions - \(mathematical expression\) + \(mathematical expression\) -} DocMathInline String | {- | Math multiline display - \[ - mathematical expression - in multiple lines - \] + \[ + mathematical expression + in multiple lines + \] -} DocMathDisplay String | {- | Anchors, no spaces allowed - #anchor-name# + #anchor-name# -} DocAnchor String | {- | Property descriptions - prop> property description + prop> property description -} DocProperty String | {- | Examples - >>> expression - result line 1 - result line 2 + >>> expression + result line 1 + result line 2 -} DocExamples [Example] | -- | Header @@ -136,7 +139,7 @@ instance Semigroup (DocMarkup mod id) where instance Monoid (DocMarkup mod id) where mempty = DocEmpty - mconcat = foldr (<>) mempty + mconcat = fold data ModuleLink id = ModuleLink { name :: String diff --git a/test/Spec.hs b/test/Spec.hs index 7258a2d..683c56d 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -1,16 +1,15 @@ {-# LANGUAGE OverloadedStrings #-} {-# OPTIONS_GHC -Wno-orphans #-} -import Test.Hspec - +import Data.String (IsString (..)) +import Data.Text (Text) +import GHC.Stack import Identifier (Identifier) import Lexer import Parser import Types -import Data.String (IsString (..)) -import Data.Text (Text) -import GHC.Stack +import Test.Hspec import Text.Parsec.Pos main :: IO () @@ -34,9 +33,9 @@ main = hspec $ do describe "Parser" do it "Bold" do - "__bold__" `shouldParseTo` (DocBold (DocString "bold")) + "__bold__" `shouldParseTo` DocBold (DocString "bold") it "Emphasis" do - "/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis")) + "/emphasis/" `shouldParseTo` DocEmphasis (DocString "emphasis") ------------ -- Tests @@ -57,6 +56,7 @@ modules = do `shouldLexTo` [ (1, 2, Module "OtherModule.Name") , (1, 18, Anchor "myAnchor") ] + link :: Expectation link = "[link to](http://some.website)" From 82eb8435ab65983a77070a65a82ca4c147ba3939 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Sat, 27 Sep 2025 15:10:11 +0000 Subject: [PATCH 6/8] feat(lexer): implement numericEntity lexer (#6) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I did this yesterday actually, I just rebased it. Reviewed-on: https://git.elland.me/elland/haddock2/pulls/6 Reviewed-by: elland Co-authored-by: Léana 江 Co-committed-by: Léana 江 --- src/Lexer.hs | 28 +++++++++++++++++++++++++++- test/Spec.hs | 13 +++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 84a8373..77fc84a 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -8,10 +8,11 @@ module Lexer ( where import Control.Monad (mfilter, void) +import Data.Char (ord, toLower) import Data.Functor (($>)) import Data.Text (Text, intercalate) import Data.Text qualified as Text -import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper) +import GHC.Unicode (isAlphaNum, isControl, isDigit, isPrint, isSpace, isUpper) import ParserMonad (Parser, initialParserState) import Text.Parsec import Text.Parsec qualified as Parsec @@ -94,6 +95,7 @@ lexText = go , labeledLink , module_ , anchor + , numericEntity , textElement , quotes , birdTrack @@ -255,6 +257,30 @@ bold = delimitedSymmetric "__" BoldOpen BoldClose monospace :: Lexer monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose +decimal :: Parser Int +decimal = read . Text.unpack <$> takeWhile1_ isDigit + +hexadecimal :: Parser Int +hexadecimal = "x" *> (convert 0 . fmap (normalise . toLower) <$> many1 hexDigit) + where + normalise :: Char -> Int + normalise c + | ord '0' <= n && n <= ord '9' = n - ord '0' + | ord 'A' <= n && n <= ord 'F' = n - ord 'A' + 10 + | ord 'a' <= n && n <= ord 'f' = n - ord 'a' + 10 + | otherwise = error "unexpected: invalid hex number" + where + n = ord c + + convert :: Int -> [Int] -> Int + convert acc [] = acc + convert acc (x : xs) = convert (acc * 16 + x) xs + +numericEntity :: Lexer +numericEntity = do + x <- located $ between "&#" ";" (NumericEntity <$> (hexadecimal <|> decimal)) + pure [x] + other :: Lexer other = do pos <- getPosition diff --git a/test/Spec.hs b/test/Spec.hs index 683c56d..2040e2f 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -29,6 +29,7 @@ main = hspec $ do it "bird tracks" birdTracks it "module names" modules it "quotes" quotes + it "numeric entity" numericEntity it "ignores nesting" ignoreNesting describe "Parser" do @@ -152,6 +153,18 @@ space = do , (1, 2, Newline) ] +numericEntity :: Expectation +numericEntity = do + "A λ" + `shouldLexTo` [ (1, 1, NumericEntity 65) + , (1, 6, Space) + , (1, 7, NumericEntity 955) -- lambda + ] + -- Hex + "e" + `shouldLexTo` [ (1, 1, NumericEntity 101) + ] + monospace :: Expectation monospace = "@mono@" From ebda9e1d12d35db866f7af13677e7d9d7396b0bb Mon Sep 17 00:00:00 2001 From: Igor Ranieri Date: Sun, 5 Oct 2025 11:06:33 +0000 Subject: [PATCH 7/8] nixify + npins (#9) Reviewed-on: https://git.elland.me/elland/haddock2/pulls/9 Co-authored-by: Igor Ranieri Co-committed-by: Igor Ranieri --- .envrc | 1 + .forgejo/workflows/ci.yml | 2 + npins/default.nix | 146 ++++++++++++++++++++++++++++++++++++++ npins/sources.json | 11 +++ shell.nix | 24 +++++++ 5 files changed, 184 insertions(+) create mode 100644 .envrc create mode 100644 .forgejo/workflows/ci.yml create mode 100644 npins/default.nix create mode 100644 npins/sources.json create mode 100644 shell.nix diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..1d953f4 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use nix diff --git a/.forgejo/workflows/ci.yml b/.forgejo/workflows/ci.yml new file mode 100644 index 0000000..3f7362e --- /dev/null +++ b/.forgejo/workflows/ci.yml @@ -0,0 +1,2 @@ +runs-on: self-hosted + diff --git a/npins/default.nix b/npins/default.nix new file mode 100644 index 0000000..6592476 --- /dev/null +++ b/npins/default.nix @@ -0,0 +1,146 @@ +/* + This file is provided under the MIT licence: + + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ +# Generated by npins. Do not modify; will be overwritten regularly +let + data = builtins.fromJSON (builtins.readFile ./sources.json); + version = data.version; + + # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/lists.nix#L295 + range = + first: last: if first > last then [ ] else builtins.genList (n: first + n) (last - first + 1); + + # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L257 + stringToCharacters = s: map (p: builtins.substring p 1 s) (range 0 (builtins.stringLength s - 1)); + + # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L269 + stringAsChars = f: s: concatStrings (map f (stringToCharacters s)); + concatMapStrings = f: list: concatStrings (map f list); + concatStrings = builtins.concatStringsSep ""; + + # If the environment variable NPINS_OVERRIDE_${name} is set, then use + # the path directly as opposed to the fetched source. + # (Taken from Niv for compatibility) + mayOverride = + name: path: + let + envVarName = "NPINS_OVERRIDE_${saneName}"; + saneName = stringAsChars (c: if (builtins.match "[a-zA-Z0-9]" c) == null then "_" else c) name; + ersatz = builtins.getEnv envVarName; + in + if ersatz == "" then + path + else + # this turns the string into an actual Nix path (for both absolute and + # relative paths) + builtins.trace "Overriding path of \"${name}\" with \"${ersatz}\" due to set \"${envVarName}\"" ( + if builtins.substring 0 1 ersatz == "/" then + /. + ersatz + else + /. + builtins.getEnv "PWD" + "/${ersatz}" + ); + + mkSource = + name: spec: + assert spec ? type; + let + path = + if spec.type == "Git" then + mkGitSource spec + else if spec.type == "GitRelease" then + mkGitSource spec + else if spec.type == "PyPi" then + mkPyPiSource spec + else if spec.type == "Channel" then + mkChannelSource spec + else if spec.type == "Tarball" then + mkTarballSource spec + else + builtins.throw "Unknown source type ${spec.type}"; + in + spec // { outPath = mayOverride name path; }; + + mkGitSource = + { + repository, + revision, + url ? null, + submodules, + hash, + branch ? null, + ... + }: + assert repository ? type; + # At the moment, either it is a plain git repository (which has an url), or it is a GitHub/GitLab repository + # In the latter case, there we will always be an url to the tarball + if url != null && !submodules then + builtins.fetchTarball { + inherit url; + sha256 = hash; # FIXME: check nix version & use SRI hashes + } + else + let + url = + if repository.type == "Git" then + repository.url + else if repository.type == "GitHub" then + "https://github.com/${repository.owner}/${repository.repo}.git" + else if repository.type == "GitLab" then + "${repository.server}/${repository.repo_path}.git" + else + throw "Unrecognized repository type ${repository.type}"; + urlToName = + url: rev: + let + matched = builtins.match "^.*/([^/]*)(\\.git)?$" url; + + short = builtins.substring 0 7 rev; + + appendShort = if (builtins.match "[a-f0-9]*" rev) != null then "-${short}" else ""; + in + "${if matched == null then "source" else builtins.head matched}${appendShort}"; + name = urlToName url revision; + in + builtins.fetchGit { + rev = revision; + inherit name; + # hash = hash; + inherit url submodules; + }; + + mkPyPiSource = + { url, hash, ... }: + builtins.fetchurl { + inherit url; + sha256 = hash; + }; + + mkChannelSource = + { url, hash, ... }: + builtins.fetchTarball { + inherit url; + sha256 = hash; + }; + + mkTarballSource = + { + url, + locked_url ? url, + hash, + ... + }: + builtins.fetchTarball { + url = locked_url; + sha256 = hash; + }; +in +if version == 5 then + builtins.mapAttrs mkSource data.pins +else + throw "Unsupported format version ${toString version} in sources.json. Try running `npins upgrade`" diff --git a/npins/sources.json b/npins/sources.json new file mode 100644 index 0000000..5317047 --- /dev/null +++ b/npins/sources.json @@ -0,0 +1,11 @@ +{ + "pins": { + "nixpkgs": { + "type": "Channel", + "name": "nixpkgs-unstable", + "url": "https://releases.nixos.org/nixpkgs/nixpkgs-25.11pre868532.647e5c14cbd5/nixexprs.tar.xz", + "hash": "0i6mgl7pm7y4ydrrll7szmv8hhxb3cyny8x1g1a8sp3g5wl3yd9g" + } + }, + "version": 5 +} diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..d67a849 --- /dev/null +++ b/shell.nix @@ -0,0 +1,24 @@ +let + sources = import ./npins; +in +{ + pkgs ? import sources.nixpkgs { }, +}: + +pkgs.mkShell rec { + name = "haddock2"; + + packages = + with pkgs; + [ + haskell.packages.ghc912.ghc + zlib + ] + ++ map haskell.lib.justStaticExecutables [ + haskellPackages.cabal-gild + haskellPackages.fourmolu + cabal-install + ]; + + env.LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath packages; +} From 1664694134314e33b8e90fe874e0d89222909baf Mon Sep 17 00:00:00 2001 From: Igor Ranieri Date: Sun, 5 Oct 2025 13:18:49 +0000 Subject: [PATCH 8/8] chore: add hls (#10) Reviewed-on: https://git.elland.me/elland/haddock2/pulls/10 Co-authored-by: Igor Ranieri Co-committed-by: Igor Ranieri --- shell.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/shell.nix b/shell.nix index d67a849..61a931d 100644 --- a/shell.nix +++ b/shell.nix @@ -12,6 +12,7 @@ pkgs.mkShell rec { with pkgs; [ haskell.packages.ghc912.ghc + haskell.packages.ghc912.haskell-language-server zlib ] ++ map haskell.lib.justStaticExecutables [