From 449b7c8ca73018b738a81069f219af38b8270b87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Sun, 28 Sep 2025 12:35:56 +0800 Subject: [PATCH 1/5] ref(lexer): attempt to not try on every token ...for a better error message and better perf --- src/Lexer.hs | 54 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 77fc84a..84e38cc 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -82,28 +82,43 @@ lexText = go Parsec.optionMaybe Parsec.eof >>= \case Just _ -> pure [] Nothing -> do - toks <- - choice $ - Parsec.try - <$> [ mathMultiline - , mathInline - , escape -- maths go before escape to avoid mismatch - , headers - , newlineToken - , spaceToken - , link - , labeledLink - , module_ - , anchor - , numericEntity - , textElement - , quotes - , birdTrack - , other - ] + toks <- topLevel rest <- go pure (toks <> rest) +{- FOURMOLU_DISABLE -} + topLevel = + -- backtracking here so we always have a chance to try "other", the "catch-all-leave-to-parser-to-deal-with" choice + -- TODO: is this desirable? do we throw lexer error at all? + try + ( choice + -- Sorted in + -- - longest to shortest parse path + -- - highest frequency to lowest frequency (for performance?) + -- - more exact to more freeform (the latter can be the former but not vice versa) + [ spaceToken + , newlineToken + + , try module_ + , quotes + , birdTrack + + -- starts with "\" + , try mathMultiline + , try mathInline + , escape + + , headers + , labeledLink + , link + , anchor + , numericEntity + , textElement + ] + ) + <|> other +{- FOURMOLU_ENABLE -} + -- Tokens textElement :: Parser [LocatedToken] @@ -239,6 +254,7 @@ mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose mathInline :: Lexer mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose +-- TODO: make sure this starts at column 0? birdTrack :: Lexer birdTrack = delimitedNoTrailing ">> " eol BirdTrack From f9423d4af0e6b4a8f0f2ae4418d03f91e9994ed1 Mon Sep 17 00:00:00 2001 From: Igor Ranieri Date: Sun, 28 Sep 2025 21:55:02 +0200 Subject: [PATCH 2/5] Added expression eval; adjusted birdtrack, added sol combinator. --- src/Lexer.hs | 22 ++++++++++++++++++++-- test/Spec.hs | 38 ++++++++++++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 84e38cc..3b04ecc 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -42,6 +42,7 @@ data Token | Escape | EmphasisOpen | EmphasisClose + | Expression | Header Level | MonospaceOpen | MonospaceClose @@ -101,6 +102,7 @@ lexText = go , try module_ , quotes + , try expression , birdTrack -- starts with "\" @@ -158,16 +160,26 @@ delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok < asList (a, tok, b) = [a, tok, b] delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken] -delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) +delimitedNoTrailing openP closeP openTok = + asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) where asList (a, tok, _) = [a, tok] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric s = delimited s s +--- End of line // end of file eol :: Parser () eol = void "\n" <|> void "\r\n" <|> Parsec.eof +-- Start of line // start of file +sol :: Parser () +sol = do + p <- getPosition + if sourceColumn p == 1 + then pure () + else fail "Not at start of line/document" + header1 :: Lexer header1 = delimitedNoTrailing "= " eol (Header One) @@ -256,7 +268,13 @@ mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose -- TODO: make sure this starts at column 0? birdTrack :: Lexer -birdTrack = delimitedNoTrailing ">> " eol BirdTrack +birdTrack = delimitedNoTrailing (sol <* "> ") eol BirdTrack + +-- TODO: also match following lines iff: +-- they start with alphanum +-- they're not empty +expression :: Lexer +expression = delimitedNoTrailing (sol <* ">>> ") eol Expression escape :: Lexer escape = delimitedNoTrailing "\\" eol Escape diff --git a/test/Spec.hs b/test/Spec.hs index 2040e2f..3db44f1 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -18,7 +18,7 @@ main = hspec $ do describe "minimal" do it "handles unicode" unicode it "escapes" escaping - it "maths" math + it "maths" maths it "anchors" anchor it "space chars" space it "bare string" someString @@ -29,6 +29,7 @@ main = hspec $ do it "bird tracks" birdTracks it "module names" modules it "quotes" quotes + it "expressions" expressions it "numeric entity" numericEntity it "ignores nesting" ignoreNesting @@ -91,8 +92,8 @@ anchor = `shouldLexTo` [ (1, 1, Anchor "myAnchor") ] -math :: IO () -math = do +maths :: IO () +maths = do "\\[some math\\]" `shouldLexTo` [ (1, 1, MathMultilineOpen) , (1, 3, Token "some math") @@ -128,10 +129,35 @@ ignoreNesting = ] birdTracks :: Expectation -birdTracks = - ">> code" +birdTracks = do + "> code line" `shouldLexTo` [ (1, 1, BirdTrack) - , (1, 4, Token "code") + , (1, 3, Token "code line") + ] + " > not code" + `shouldLexTo` [ (1, 1, Space) + , (1, 2, Token ">") + , (1, 3, Space) + , (1, 4, Token "not") + , (1, 7, Space) + , (1, 8, Token "code") + ] + +expressions :: Expectation +expressions = do + ">>> eval this" + `shouldLexTo` [ (1, 1, Expression) + , (1, 5, Token "eval this") + ] + " >>> not eval this" + `shouldLexTo` [ (1, 1, Space) + , (1, 2, Token ">>>") + , (1, 5, Space) + , (1, 6, Token "not") + , (1, 9, Space) + , (1, 10, Token "eval") + , (1, 14, Space) + , (1, 15, Token "this") ] quotes :: Expectation From ebda9e1d12d35db866f7af13677e7d9d7396b0bb Mon Sep 17 00:00:00 2001 From: Igor Ranieri Date: Sun, 5 Oct 2025 11:06:33 +0000 Subject: [PATCH 3/5] nixify + npins (#9) Reviewed-on: https://git.elland.me/elland/haddock2/pulls/9 Co-authored-by: Igor Ranieri Co-committed-by: Igor Ranieri --- .envrc | 1 + .forgejo/workflows/ci.yml | 2 + npins/default.nix | 146 ++++++++++++++++++++++++++++++++++++++ npins/sources.json | 11 +++ shell.nix | 24 +++++++ 5 files changed, 184 insertions(+) create mode 100644 .envrc create mode 100644 .forgejo/workflows/ci.yml create mode 100644 npins/default.nix create mode 100644 npins/sources.json create mode 100644 shell.nix diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..1d953f4 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use nix diff --git a/.forgejo/workflows/ci.yml b/.forgejo/workflows/ci.yml new file mode 100644 index 0000000..3f7362e --- /dev/null +++ b/.forgejo/workflows/ci.yml @@ -0,0 +1,2 @@ +runs-on: self-hosted + diff --git a/npins/default.nix b/npins/default.nix new file mode 100644 index 0000000..6592476 --- /dev/null +++ b/npins/default.nix @@ -0,0 +1,146 @@ +/* + This file is provided under the MIT licence: + + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ +# Generated by npins. Do not modify; will be overwritten regularly +let + data = builtins.fromJSON (builtins.readFile ./sources.json); + version = data.version; + + # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/lists.nix#L295 + range = + first: last: if first > last then [ ] else builtins.genList (n: first + n) (last - first + 1); + + # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L257 + stringToCharacters = s: map (p: builtins.substring p 1 s) (range 0 (builtins.stringLength s - 1)); + + # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L269 + stringAsChars = f: s: concatStrings (map f (stringToCharacters s)); + concatMapStrings = f: list: concatStrings (map f list); + concatStrings = builtins.concatStringsSep ""; + + # If the environment variable NPINS_OVERRIDE_${name} is set, then use + # the path directly as opposed to the fetched source. + # (Taken from Niv for compatibility) + mayOverride = + name: path: + let + envVarName = "NPINS_OVERRIDE_${saneName}"; + saneName = stringAsChars (c: if (builtins.match "[a-zA-Z0-9]" c) == null then "_" else c) name; + ersatz = builtins.getEnv envVarName; + in + if ersatz == "" then + path + else + # this turns the string into an actual Nix path (for both absolute and + # relative paths) + builtins.trace "Overriding path of \"${name}\" with \"${ersatz}\" due to set \"${envVarName}\"" ( + if builtins.substring 0 1 ersatz == "/" then + /. + ersatz + else + /. + builtins.getEnv "PWD" + "/${ersatz}" + ); + + mkSource = + name: spec: + assert spec ? type; + let + path = + if spec.type == "Git" then + mkGitSource spec + else if spec.type == "GitRelease" then + mkGitSource spec + else if spec.type == "PyPi" then + mkPyPiSource spec + else if spec.type == "Channel" then + mkChannelSource spec + else if spec.type == "Tarball" then + mkTarballSource spec + else + builtins.throw "Unknown source type ${spec.type}"; + in + spec // { outPath = mayOverride name path; }; + + mkGitSource = + { + repository, + revision, + url ? null, + submodules, + hash, + branch ? null, + ... + }: + assert repository ? type; + # At the moment, either it is a plain git repository (which has an url), or it is a GitHub/GitLab repository + # In the latter case, there we will always be an url to the tarball + if url != null && !submodules then + builtins.fetchTarball { + inherit url; + sha256 = hash; # FIXME: check nix version & use SRI hashes + } + else + let + url = + if repository.type == "Git" then + repository.url + else if repository.type == "GitHub" then + "https://github.com/${repository.owner}/${repository.repo}.git" + else if repository.type == "GitLab" then + "${repository.server}/${repository.repo_path}.git" + else + throw "Unrecognized repository type ${repository.type}"; + urlToName = + url: rev: + let + matched = builtins.match "^.*/([^/]*)(\\.git)?$" url; + + short = builtins.substring 0 7 rev; + + appendShort = if (builtins.match "[a-f0-9]*" rev) != null then "-${short}" else ""; + in + "${if matched == null then "source" else builtins.head matched}${appendShort}"; + name = urlToName url revision; + in + builtins.fetchGit { + rev = revision; + inherit name; + # hash = hash; + inherit url submodules; + }; + + mkPyPiSource = + { url, hash, ... }: + builtins.fetchurl { + inherit url; + sha256 = hash; + }; + + mkChannelSource = + { url, hash, ... }: + builtins.fetchTarball { + inherit url; + sha256 = hash; + }; + + mkTarballSource = + { + url, + locked_url ? url, + hash, + ... + }: + builtins.fetchTarball { + url = locked_url; + sha256 = hash; + }; +in +if version == 5 then + builtins.mapAttrs mkSource data.pins +else + throw "Unsupported format version ${toString version} in sources.json. Try running `npins upgrade`" diff --git a/npins/sources.json b/npins/sources.json new file mode 100644 index 0000000..5317047 --- /dev/null +++ b/npins/sources.json @@ -0,0 +1,11 @@ +{ + "pins": { + "nixpkgs": { + "type": "Channel", + "name": "nixpkgs-unstable", + "url": "https://releases.nixos.org/nixpkgs/nixpkgs-25.11pre868532.647e5c14cbd5/nixexprs.tar.xz", + "hash": "0i6mgl7pm7y4ydrrll7szmv8hhxb3cyny8x1g1a8sp3g5wl3yd9g" + } + }, + "version": 5 +} diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..d67a849 --- /dev/null +++ b/shell.nix @@ -0,0 +1,24 @@ +let + sources = import ./npins; +in +{ + pkgs ? import sources.nixpkgs { }, +}: + +pkgs.mkShell rec { + name = "haddock2"; + + packages = + with pkgs; + [ + haskell.packages.ghc912.ghc + zlib + ] + ++ map haskell.lib.justStaticExecutables [ + haskellPackages.cabal-gild + haskellPackages.fourmolu + cabal-install + ]; + + env.LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath packages; +} From a9bccf4f07538577afbec05b3bca09418e2242de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9ana=20=E6=B1=9F?= Date: Sun, 28 Sep 2025 12:35:56 +0800 Subject: [PATCH 4/5] ref(lexer): attempt to not try on every token ...for a better error message and better perf --- src/Lexer.hs | 54 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 77fc84a..84e38cc 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -82,28 +82,43 @@ lexText = go Parsec.optionMaybe Parsec.eof >>= \case Just _ -> pure [] Nothing -> do - toks <- - choice $ - Parsec.try - <$> [ mathMultiline - , mathInline - , escape -- maths go before escape to avoid mismatch - , headers - , newlineToken - , spaceToken - , link - , labeledLink - , module_ - , anchor - , numericEntity - , textElement - , quotes - , birdTrack - , other - ] + toks <- topLevel rest <- go pure (toks <> rest) +{- FOURMOLU_DISABLE -} + topLevel = + -- backtracking here so we always have a chance to try "other", the "catch-all-leave-to-parser-to-deal-with" choice + -- TODO: is this desirable? do we throw lexer error at all? + try + ( choice + -- Sorted in + -- - longest to shortest parse path + -- - highest frequency to lowest frequency (for performance?) + -- - more exact to more freeform (the latter can be the former but not vice versa) + [ spaceToken + , newlineToken + + , try module_ + , quotes + , birdTrack + + -- starts with "\" + , try mathMultiline + , try mathInline + , escape + + , headers + , labeledLink + , link + , anchor + , numericEntity + , textElement + ] + ) + <|> other +{- FOURMOLU_ENABLE -} + -- Tokens textElement :: Parser [LocatedToken] @@ -239,6 +254,7 @@ mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose mathInline :: Lexer mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose +-- TODO: make sure this starts at column 0? birdTrack :: Lexer birdTrack = delimitedNoTrailing ">> " eol BirdTrack From 8c666f637c4670b9c76c1e5d1c144022d2b0d305 Mon Sep 17 00:00:00 2001 From: Igor Ranieri Date: Sun, 28 Sep 2025 21:55:02 +0200 Subject: [PATCH 5/5] Added expression eval; adjusted birdtrack, added sol combinator. --- src/Lexer.hs | 22 ++++++++++++++++++++-- test/Spec.hs | 38 ++++++++++++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/src/Lexer.hs b/src/Lexer.hs index 84e38cc..3b04ecc 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -42,6 +42,7 @@ data Token | Escape | EmphasisOpen | EmphasisClose + | Expression | Header Level | MonospaceOpen | MonospaceClose @@ -101,6 +102,7 @@ lexText = go , try module_ , quotes + , try expression , birdTrack -- starts with "\" @@ -158,16 +160,26 @@ delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok < asList (a, tok, b) = [a, tok, b] delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken] -delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) +delimitedNoTrailing openP closeP openTok = + asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) where asList (a, tok, _) = [a, tok] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric s = delimited s s +--- End of line // end of file eol :: Parser () eol = void "\n" <|> void "\r\n" <|> Parsec.eof +-- Start of line // start of file +sol :: Parser () +sol = do + p <- getPosition + if sourceColumn p == 1 + then pure () + else fail "Not at start of line/document" + header1 :: Lexer header1 = delimitedNoTrailing "= " eol (Header One) @@ -256,7 +268,13 @@ mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose -- TODO: make sure this starts at column 0? birdTrack :: Lexer -birdTrack = delimitedNoTrailing ">> " eol BirdTrack +birdTrack = delimitedNoTrailing (sol <* "> ") eol BirdTrack + +-- TODO: also match following lines iff: +-- they start with alphanum +-- they're not empty +expression :: Lexer +expression = delimitedNoTrailing (sol <* ">>> ") eol Expression escape :: Lexer escape = delimitedNoTrailing "\\" eol Escape diff --git a/test/Spec.hs b/test/Spec.hs index 2040e2f..3db44f1 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -18,7 +18,7 @@ main = hspec $ do describe "minimal" do it "handles unicode" unicode it "escapes" escaping - it "maths" math + it "maths" maths it "anchors" anchor it "space chars" space it "bare string" someString @@ -29,6 +29,7 @@ main = hspec $ do it "bird tracks" birdTracks it "module names" modules it "quotes" quotes + it "expressions" expressions it "numeric entity" numericEntity it "ignores nesting" ignoreNesting @@ -91,8 +92,8 @@ anchor = `shouldLexTo` [ (1, 1, Anchor "myAnchor") ] -math :: IO () -math = do +maths :: IO () +maths = do "\\[some math\\]" `shouldLexTo` [ (1, 1, MathMultilineOpen) , (1, 3, Token "some math") @@ -128,10 +129,35 @@ ignoreNesting = ] birdTracks :: Expectation -birdTracks = - ">> code" +birdTracks = do + "> code line" `shouldLexTo` [ (1, 1, BirdTrack) - , (1, 4, Token "code") + , (1, 3, Token "code line") + ] + " > not code" + `shouldLexTo` [ (1, 1, Space) + , (1, 2, Token ">") + , (1, 3, Space) + , (1, 4, Token "not") + , (1, 7, Space) + , (1, 8, Token "code") + ] + +expressions :: Expectation +expressions = do + ">>> eval this" + `shouldLexTo` [ (1, 1, Expression) + , (1, 5, Token "eval this") + ] + " >>> not eval this" + `shouldLexTo` [ (1, 1, Space) + , (1, 2, Token ">>>") + , (1, 5, Space) + , (1, 6, Token "not") + , (1, 9, Space) + , (1, 10, Token "eval") + , (1, 14, Space) + , (1, 15, Token "this") ] quotes :: Expectation