chore(lexer): clean up

ref(lexer): simplify labeledLink
style(lexer): make binding naming consistent
2025-09-24 22:32:52 +08:00 · 2025-09-24 22:31:59 +08:00 · 2025-09-24 22:25:45 +08:00 · 2025-09-24 22:20:32 +08:00 · 2025-09-24 21:35:21 +08:00 · 2025-09-24 21:31:26 +08:00
15 changed files with 322 additions and 806 deletions
--- a/.envrc
+++ b/.envrc
@ -1 +0,0 @@
 use nix
--- a/.forgejo/workflows/ci.yml
+++ b/.forgejo/workflows/ci.yml
@ -1,2 +0,0 @@
 runs-on: self-hosted
--- a/.forgejo/workflows/test.yaml
+++ b/.forgejo/workflows/test.yaml
@ -1,99 +0,0 @@
 name: Haskell CI
 on:
  pull_request:
    branches:
      - dev
      - main
  push:
    branches:
      - main
 jobs:
  build:
    runs-on: docker
    container:
      image: elland/haddock2:latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
      - name: Check versions
        run: |
          ghc --version
          cabal --version
          node --version
      - name: Cache Cabal packages
        uses: actions/cache@v4
        with:
          path: |
            ~/.cabal/packages
            ~/.cabal/store
            dist-newstyle
          key: ${{ runner.os }}-haskell-9.10-cabal-${{ hashFiles('**/*.cabal', '**/cabal.project') }}
          restore-keys: |
            ${{ runner.os }}-haskell-9.10-cabal-
      - name: Update Cabal package index
        run: cabal update
      - name: Configure project
        run: cabal configure --enable-tests --enable-benchmarks
      - name: Build dependencies
        run: cabal build --only-dependencies --enable-tests --enable-benchmarks
      - name: Build project
        run: cabal build --enable-tests --enable-benchmarks
      - name: Run documentation build
        run: cabal haddock
  test:
    runs-on: docker
    container:
      image: elland/haddock2:latest
    needs: build
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
      - name: Cache Cabal packages
        uses: actions/cache@v4
        with:
          path: |
            ~/.cabal/packages
            ~/.cabal/store
            dist-newstyle
          key: ${{ runner.os }}-haskell-9.10-cabal-${{ hashFiles('**/*.cabal', '**/cabal.project') }}
          restore-keys: |
            ${{ runner.os }}-haskell-9.10-cabal-
      - name: Update Cabal package index
        run: cabal update
      - name: Configure project
        run: cabal configure --enable-tests --enable-benchmarks
      - name: Build dependencies
        run: cabal build --only-dependencies --enable-tests --enable-benchmarks
      - name: Build project
        run: cabal build --enable-tests --enable-benchmarks
      - name: Run tests
        run: cabal test --test-show-details=direct
  fourmolu:
    runs-on: docker
    container:
      image: elland/haddock2:latest
    needs: build
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
      - name: Run fourmolu
        run: |
          find src test app -name "*.hs" -exec fourmolu --check-idempotence {} \; 2>/dev/null || true
          find src test app -name "*.hs" -exec fourmolu --mode check {} \;
  hlint:
    runs-on: docker
    container:
      image: elland/haddock2:latest
    needs: build
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
      - name: Run hlint
        run: |
          if [ -d src ]; then hlint src/; fi
          if [ -d test ]; then hlint test/; fi  
          if [ -d app ]; then hlint app/; fi
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@ -1,2 +0,0 @@
 # Fourmolu
 9998ac92263127b05fd1eb607f3b7740c69d3a58
--- a/36
+++ b/36
@ -1,36 +0,0 @@
 FROM haskell:9.10.2-bullseye AS builder
 RUN apt-get update && apt-get install -y curl git && rm -rf /var/lib/apt/lists/*
 RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \
    apt-get install -y nodejs && \
    rm -rf /var/lib/apt/lists/*
 RUN cabal update && \
    cabal install --install-method=copy --installdir=/usr/local/bin \
    fourmolu hlint cabal-gild
 WORKDIR /workspace
 FROM haskell:9.10.2-bullseye
 RUN apt-get update && apt-get install -y \
    libgmp10 \
    curl \
    && rm -rf /var/lib/apt/lists/*
 RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \
    apt-get install -y nodejs && \
    rm -rf /var/lib/apt/lists/*
 RUN cabal update
 COPY --from=builder /usr/local/bin/cabal /usr/local/bin/
 COPY --from=builder /usr/local/bin/fourmolu /usr/local/bin/
 COPY --from=builder /usr/local/bin/hlint /usr/local/bin/
 COPY --from=builder /usr/local/bin/cabal-gild /usr/local/bin/
 WORKDIR /workspace
 CMD [ "bash" ]
--- a/44
+++ b/44
@ -1,44 +0,0 @@
 .PHONY: help
 help: ## Show this help
 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
 .PHONY: build
 build: ## Build the project
 	cabal build
 .PHONY: test
 test: ## Run tests
 	cabal test --test-show-details=direct
 .PHONY: clean
 clean: ## Clean build artifacts
 	cabal clean
 .PHONY: fourmolu
 fourmolu: ## Format Haskell code
 	find . -type f -name "*.hs" ! -path "./dist-newstyle/*" -exec fourmolu -i {} +
 .PHONY: fourmolu-check
 fourmolu-check: ## Check if code is formatted
 	find . -type f -name "*.hs" ! -path "./dist-newstyle/*" -exec fourmolu --mode check {} \;
 .PHONY: lint
 lint: ## Run hlint
 	hlint src test app
 .PHONY: cabal-gild
 cabal-gild: ## Format cabal file
 	cabal-gild --io=haddock2.cabal
 .PHONY: format
 format: fourmolu cabal-gild ## Run all formatters
 .PHONY: check
 check: fourmolu-check lint ## Run all checks (CI-style)
 .PHONY: ci
 ci: build test check ## Run full CI pipeline locally
 .PHONY: docs
 docs: ## Generate documentation
 	cabal haddock --haddock-hyperlink-source
--- a/fourmolu.yaml
+++ b/fourmolu.yaml
@ -1,72 +0,0 @@
 # Number of spaces per indentation step
 indentation: 2
 # Max line length for automatic line breaking
 column-limit: none
 # Styling of arrows in type signatures (choices: trailing, leading, or leading-args)
 function-arrows: trailing
 # How to place commas in multi-line lists, records, etc. (choices: leading or trailing)
 comma-style: leading
 # Styling of import/export lists (choices: leading, trailing, or diff-friendly)
 import-export-style: diff-friendly
 # Rules for grouping import declarations
 import-grouping: legacy
 # Whether to full-indent or half-indent 'where' bindings past the preceding body
 indent-wheres: false
 # Whether to leave a space before an opening record brace
 record-brace-space: false
 # Number of spaces between top-level declarations
 newlines-between-decls: 1
 # How to print Haddock comments (choices: single-line, multi-line, or multi-line-compact)
 haddock-style: multi-line
 # How to print module docstring
 haddock-style-module: null
 # Styling of let blocks (choices: auto, inline, newline, or mixed)
 let-style: auto
 # How to align the 'in' keyword with respect to the 'let' keyword (choices: left-align, right-align, or no-space)
 in-style: right-align
 # Whether to put parentheses around a single constraint (choices: auto, always, or never)
 single-constraint-parens: always
 # Whether to put parentheses around a single deriving class (choices: auto, always, or never)
 single-deriving-parens: always
 # Whether to sort constraints
 sort-constraints: false
 # Whether to sort derived classes
 sort-derived-classes: false
 # Whether to sort deriving clauses
 sort-deriving-clauses: false
 # Whether to place section operators (those that are infixr 0, such as $) in trailing position, continuing the expression indented below
 trailing-section-operators: true
 # Output Unicode syntax (choices: detect, always, or never)
 unicode: never
 # Give the programmer more choice on where to insert blank lines
 respectful: true
 # Fixity information for operators
 fixities: []
 # Module reexports Fourmolu should know about
 reexports: []
 # Modules defined by the current Cabal package for import grouping
 local-modules: []
--- a/haddock2.cabal
+++ b/haddock2.cabal
@ -46,10 +46,10 @@ test-suite haddock2-test
  type: exitcode-stdio-1.0
  main-is: Spec.hs
  build-depends:
    parsec ^>=3.1.18.0,
    base >=4.20.1.0,
    haddock2:{haddock2-lib},
    hspec ^>=2.11.0,
    parsec ^>=3.1.18.0,
    text ^>=2.1.2,
  hs-source-dirs: test
--- a/npins/default.nix
+++ b/npins/default.nix
@ -1,146 +0,0 @@
 /*
  This file is provided under the MIT licence:
  Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
 # Generated by npins. Do not modify; will be overwritten regularly
 let
  data = builtins.fromJSON (builtins.readFile ./sources.json);
  version = data.version;
  # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/lists.nix#L295
  range =
    first: last: if first > last then [ ] else builtins.genList (n: first + n) (last - first + 1);
  # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L257
  stringToCharacters = s: map (p: builtins.substring p 1 s) (range 0 (builtins.stringLength s - 1));
  # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L269
  stringAsChars = f: s: concatStrings (map f (stringToCharacters s));
  concatMapStrings = f: list: concatStrings (map f list);
  concatStrings = builtins.concatStringsSep "";
  # If the environment variable NPINS_OVERRIDE_${name} is set, then use
  # the path directly as opposed to the fetched source.
  # (Taken from Niv for compatibility)
  mayOverride =
    name: path:
    let
      envVarName = "NPINS_OVERRIDE_${saneName}";
      saneName = stringAsChars (c: if (builtins.match "[a-zA-Z0-9]" c) == null then "_" else c) name;
      ersatz = builtins.getEnv envVarName;
    in
    if ersatz == "" then
      path
    else
      # this turns the string into an actual Nix path (for both absolute and
      # relative paths)
      builtins.trace "Overriding path of \"${name}\" with \"${ersatz}\" due to set \"${envVarName}\"" (
        if builtins.substring 0 1 ersatz == "/" then
          /. + ersatz
        else
          /. + builtins.getEnv "PWD" + "/${ersatz}"
      );
  mkSource =
    name: spec:
    assert spec ? type;
    let
      path =
        if spec.type == "Git" then
          mkGitSource spec
        else if spec.type == "GitRelease" then
          mkGitSource spec
        else if spec.type == "PyPi" then
          mkPyPiSource spec
        else if spec.type == "Channel" then
          mkChannelSource spec
        else if spec.type == "Tarball" then
          mkTarballSource spec
        else
          builtins.throw "Unknown source type ${spec.type}";
    in
    spec // { outPath = mayOverride name path; };
  mkGitSource =
    {
      repository,
      revision,
      url ? null,
      submodules,
      hash,
      branch ? null,
      ...
    }:
    assert repository ? type;
    # At the moment, either it is a plain git repository (which has an url), or it is a GitHub/GitLab repository
    # In the latter case, there we will always be an url to the tarball
    if url != null && !submodules then
      builtins.fetchTarball {
        inherit url;
        sha256 = hash; # FIXME: check nix version & use SRI hashes
      }
    else
      let
        url =
          if repository.type == "Git" then
            repository.url
          else if repository.type == "GitHub" then
            "https://github.com/${repository.owner}/${repository.repo}.git"
          else if repository.type == "GitLab" then
            "${repository.server}/${repository.repo_path}.git"
          else
            throw "Unrecognized repository type ${repository.type}";
        urlToName =
          url: rev:
          let
            matched = builtins.match "^.*/([^/]*)(\\.git)?$" url;
            short = builtins.substring 0 7 rev;
            appendShort = if (builtins.match "[a-f0-9]*" rev) != null then "-${short}" else "";
          in
          "${if matched == null then "source" else builtins.head matched}${appendShort}";
        name = urlToName url revision;
      in
      builtins.fetchGit {
        rev = revision;
        inherit name;
        # hash = hash;
        inherit url submodules;
      };
  mkPyPiSource =
    { url, hash, ... }:
    builtins.fetchurl {
      inherit url;
      sha256 = hash;
    };
  mkChannelSource =
    { url, hash, ... }:
    builtins.fetchTarball {
      inherit url;
      sha256 = hash;
    };
  mkTarballSource =
    {
      url,
      locked_url ? url,
      hash,
      ...
    }:
    builtins.fetchTarball {
      url = locked_url;
      sha256 = hash;
    };
 in
 if version == 5 then
  builtins.mapAttrs mkSource data.pins
 else
  throw "Unsupported format version ${toString version} in sources.json. Try running `npins upgrade`"
--- a/npins/sources.json
+++ b/npins/sources.json
@ -1,11 +0,0 @@
 {
  "pins": {
    "nixpkgs": {
      "type": "Channel",
      "name": "nixpkgs-unstable",
      "url": "https://releases.nixos.org/nixpkgs/nixpkgs-25.11pre868532.647e5c14cbd5/nixexprs.tar.xz",
      "hash": "0i6mgl7pm7y4ydrrll7szmv8hhxb3cyny8x1g1a8sp3g5wl3yd9g"
    }
  },
  "version": 5
 }
--- a/shell.nix
+++ b/shell.nix
@ -1,25 +0,0 @@
 let
  sources = import ./npins;
 in
 {
  pkgs ? import sources.nixpkgs { },
 }:
 pkgs.mkShell rec {
  name = "haddock2";
  packages =
    with pkgs;
    [
      haskell.packages.ghc912.ghc
      haskell.packages.ghc912.haskell-language-server
      zlib
    ]
    ++ map haskell.lib.justStaticExecutables [
      haskellPackages.cabal-gild
      haskellPackages.fourmolu
      cabal-install
    ];
  env.LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath packages;
 }
--- a/src/Lexer.hs
+++ b/src/Lexer.hs
@ -1,154 +1,150 @@
 {-# LANGUAGE OverloadedStrings #-}
 module Lexer (
-  Token (..),
+    Token (..),
-  lexer,
+    lexer,
-  emphasis,
+    emphasis,
-)
+) where
 where
 import Control.Monad (mfilter, void)
 import Data.Char (ord, toLower)
 import Data.Functor (($>))
 import Data.Text (Text, intercalate)
 import Data.Text qualified as Text
-import GHC.Unicode (isAlphaNum, isControl, isDigit, isPrint, isSpace, isUpper)
+import GHC.Unicode (isAlphaNum, isControl, isPrint, isSpace, isUpper)
 import ParserMonad (Parser, initialParserState)
 import Text.Parsec
 import Text.Parsec qualified as Parsec
 import Text.Parsec.Pos (updatePosChar)
 type Located a = (SourcePos, a)
 type LocatedToken = (SourcePos, Token)
 type Lexer = Parser [LocatedToken]
 data Level
-  = One
+    = One
-  | Two
+    | Two
-  | Three
+    | Three
-  | Four
+    | Four
-  | Five
+    | Five
-  | Six
+    | Six
-  deriving (Eq, Show)
+    deriving (Eq, Show)
 data Token
-  = Token Text
+    = Token Text
-  | Anchor Text
+    | Anchor Text
-  | BirdTrack
+    | BirdTrack
-  | BoldOpen
+    | BoldOpen
-  | BoldClose
+    | BoldClose
-  | Escape
+    | Escape
-  | EmphasisOpen
+    | EmphasisOpen
-  | EmphasisClose
+    | EmphasisClose
-  | Header Level
+    | Header Level
-  | MonospaceOpen
+    | MonospaceOpen
-  | MonospaceClose
+    | MonospaceClose
-  | Newline
+    | Newline
-  | LinkOpen
+    | LinkOpen
-  | LinkClose
+    | LinkClose
-  | LabeledLinkOpen
+    | LabeledLinkOpen
-  | LabeledLinkClose
+    | LabeledLinkClose
-  | ParenOpen
+    | ParenOpen
-  | ParenClose
+    | ParenClose
-  | BracketOpen
+    | BracketOpen
-  | BracketClose
+    | BracketClose
-  | MathInlineOpen
+    | MathInlineOpen
-  | MathInlineClose
+    | MathInlineClose
-  | MathMultilineOpen
+    | MathMultilineOpen
-  | MathMultilineClose
+    | MathMultilineClose
-  | NumericEntity Int
+    | NumericEntity Int
-  | Module Text
+    | Module Text
-  | QuoteOpen
+    | QuoteOpen
-  | QuoteClose
+    | QuoteClose
-  | Space
+    | Space
-  | EOF
+    | EOF
-  deriving (Eq, Show)
+    deriving (Eq, Show)
 located :: Parser a -> Parser (SourcePos, a)
 located p = (,) <$> getPosition <*> p
 tokenise :: [Parser a] -> Parser [(SourcePos, a)]
-tokenise = mapM located
+tokenise = sequence . map located
 lexer :: String -> Either ParseError [LocatedToken]
 lexer = Parsec.runParser lexText initialParserState "input" . Text.pack
 lexText :: Parser [LocatedToken]
 lexText = go
- where
+  where
-  go = do
+    go = do
-    Parsec.optionMaybe Parsec.eof >>= \case
+        Parsec.optionMaybe Parsec.eof >>= \case
-      Just _ -> pure []
+            Just _ -> pure []
-      Nothing -> do
+            Nothing -> do
-        toks <-
+                toks <-
-          choice $
+                    choice $
-            Parsec.try
+                        Parsec.try
-              <$> [ mathMultiline
+                            <$> [ mathMultiline
-                  , mathInline
+                                , mathInline
-                  , escape -- maths go before escape to avoid mismatch
+                                , escape -- maths go before escape to avoid mismatch
-                  , headers
+                                , headers
-                  , newlineToken
+                                , newlineToken
-                  , spaceToken
+                                , spaceToken
-                  , link
+                                , link
-                  , labeledLink
+                                , labeledLink
-                  , module_
+                                , module_
-                  , anchor
+                                , anchor
-                  , numericEntity
+                                , textElement
-                  , textElement
+                                , quotes
-                  , quotes
+                                , birdTrack
-                  , birdTrack
+                                , other
-                  , other
+                                ]
-                  ]
+                rest <- go
-        rest <- go
+                pure (toks <> rest)
        pure (toks <> rest)
 -- Tokens
 textElement :: Parser [LocatedToken]
 textElement =
-  choice $
+    choice $
-    Parsec.try
+        Parsec.try
-      <$> [ emphasis
+            <$> [ emphasis
-          , bold
+                , bold
-          , monospace
+                , monospace
-          ]
+                ]
 headers :: Parser [LocatedToken]
 headers =
-  choice $
+    choice $
-    Parsec.try
+        Parsec.try
-      <$> [ header1
+            <$> [ header1
-          , header2
+                , header2
-          , header3
+                , header3
-          , header4
+                , header4
-          , header5
+                , header5
-          , header6
+                , header6
-          ]
+                ]
 anyUntil :: Parser a -> Parser Text
 anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p)
 delimitedAsTuple :: Parser open -> Parser close -> Parser (Located open, LocatedToken, Located close)
 delimitedAsTuple openP closeP =
-  (,,)
+    (,,)
-    <$> located openP
+        <$> located openP
-    <*> located (Token <$> anyUntil closeP)
+        <*> located (Token <$> anyUntil closeP)
-    <*> located closeP
+        <*> located closeP
 delimited :: Parser open -> Parser close -> Token -> Token -> Parser [LocatedToken]
 delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP)
- where
+    where
-  asList (a, tok, b) = [a, tok, b]
+        asList (a, tok, b) = [a, tok, b]
 delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken]
 delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
- where
+    where
-  asList (a, tok, _) = [a, tok]
+        asList (a, tok, _) = [a, tok]
 delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
-delimitedSymmetric s = delimited s s
+delimitedSymmetric s t1 t2 = delimited s s t1 t2
 eol :: Parser ()
 eol = void "\n" <|> void "\r\n" <|> Parsec.eof
@ -174,8 +170,9 @@ header6 = delimitedNoTrailing "====== " eol (Header Six)
 -- #anchors#
 anchor :: Lexer
 anchor = do
-  x <- located $ between "#" "#" (Anchor <$> anyUntil "#")
+    x <- located $ between "#" "#" (Anchor <$> anyUntil "#")
-  pure [x]
+    pure [x]
 moduleNames :: Parser Text
 moduleNames = intercalate "." . fmap Text.pack <$> upperId `sepBy1` char '.'
@ -191,47 +188,47 @@ identifierChar = satisfy (\c -> isAlphaNum c || c == '_')
 -- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben
 module_ :: Lexer
 module_ = between (char '"') (char '"') inner
- where
+  where
-  inner = do
+    inner = do
-    m <- located $ Module <$> moduleNames
+        m <- located $ Module <$> moduleNames
-    mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText))
+        mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText))
-    pure $ case mAnchor of
+        pure $ case mAnchor of
-      Just anc -> [m, anc]
+            Just anc -> [m, anc]
-      Nothing -> [m]
+            Nothing -> [m]
-  anchorHash :: Parser Text
+    anchorHash :: Parser Text
-  anchorHash = "#" <|> try "\\#"
+    anchorHash = "#" <|> try "\\#"
-  anchorText :: Parser Text
+    anchorText :: Parser Text
-  anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
+    anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
 linkRaw :: Lexer
 linkRaw =
-  tokenise
+    tokenise
-    [ BracketOpen <$ "["
+        [ BracketOpen <$ "["
-    , Token <$> anyUntil "]"
+        , Token <$> anyUntil "]"
-    , BracketClose <$ "]"
+        , BracketClose <$ "]"
-    , ParenOpen <$ "("
+        , ParenOpen <$ "("
-    , Token <$> anyUntil ")"
+        , Token <$> anyUntil ")"
-    , ParenClose <$ ")"
+        , ParenClose <$ ")"
-    ]
+        ]
 link :: Lexer
 link = do
-  pos <- getPosition
+    pos <- getPosition
-  l <- linkRaw
+    l <- linkRaw
-  -- register the position of the last token
+    -- register the position of the last token
-  pos' <- flip incSourceColumn (-1) <$> getPosition
+    pos' <- flip incSourceColumn (-1) <$> getPosition
-  pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)]
+    pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)]
 labeledLink :: Lexer
 labeledLink = do
-  open <- located $ LabeledLinkOpen <$ "<"
+    open <- located $ LabeledLinkOpen <$ "<"
-  linkRes <- linkRaw
+    linkRes <- linkRaw
-  labelRes <- located $ Token <$> anyUntil ">"
+    labelRes <- located $ Token <$> anyUntil ">"
-  close <- located $ LabeledLinkClose <$ ">"
+    close <- located $ LabeledLinkClose <$ ">"
-  pure $
+    pure $
-    open : linkRes <> [labelRes, close]
+        open : linkRes <> [ labelRes , close ]
 mathMultiline :: Lexer
 mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose
@ -257,49 +254,25 @@ bold = delimitedSymmetric "__" BoldOpen BoldClose
 monospace :: Lexer
 monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
 decimal :: Parser Int
 decimal = read . Text.unpack <$> takeWhile1_ isDigit
 hexadecimal :: Parser Int
 hexadecimal = "x" *> (convert 0 . fmap (normalise . toLower) <$> many1 hexDigit)
 where
  normalise :: Char -> Int
  normalise c
    | ord '0' <= n && n <= ord '9' = n - ord '0'
    | ord 'A' <= n && n <= ord 'F' = n - ord 'A' + 10
    | ord 'a' <= n && n <= ord 'f' = n - ord 'a' + 10
    | otherwise = error "unexpected: invalid hex number"
   where
    n = ord c
  convert :: Int -> [Int] -> Int
  convert acc [] = acc
  convert acc (x : xs) = convert (acc * 16 + x) xs
 numericEntity :: Lexer
 numericEntity = do
  x <- located $ between "&#" ";" (NumericEntity <$> (hexadecimal <|> decimal))
  pure [x]
 other :: Lexer
 other = do
-  pos <- getPosition
+    pos <- getPosition
-  c <- takeWhile1_ isUnicodeAlphaNum
+    c <- takeWhile1_ isUnicodeAlphaNum
-  pure . pure $ (pos, Token c)
+    pure . pure $ (pos, Token c)
- where
+  where
-  isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
+    isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
 spaceToken :: Lexer
 spaceToken = do
-  pos <- getPosition
+    pos <- getPosition
-  _ <- many1 (char ' ')
+    _ <- many1 (char ' ')
-  pure . pure $ (pos, Space)
+    pure . pure $ (pos, Space)
 newlineToken :: Lexer
 newlineToken = do
-  pos <- getPosition
+    pos <- getPosition
-  _ <- newline
+    _ <- newline
-  pure . pure $ (pos, Newline)
+    pure . pure $ (pos, Newline)
 -------
 -- Helpers
@ -308,11 +281,11 @@ newlineToken = do
 -- | Like `takeWhile`, but unconditionally take escaped characters.
 takeWhile_ :: (Char -> Bool) -> Parser Text
 takeWhile_ p = scan p_ False
- where
+  where
-  p_ escaped c
+    p_ escaped c
-    | escaped = Just False
+        | escaped = Just False
-    | not $ p c = Nothing
+        | not $ p c = Nothing
-    | otherwise = Just (c == '\\')
+        | otherwise = Just (c == '\\')
 -- | Like 'takeWhile1', but unconditionally take escaped characters.
 takeWhile1_ :: (Char -> Bool) -> Parser Text
@ -322,20 +295,19 @@ takeWhile1_ = mfilter (not . Text.null) . takeWhile_
 function returns true.
 -}
 scan ::
-  -- | scan function
+    -- | scan function
-  (state -> Char -> Maybe state) ->
+    (state -> Char -> Maybe state) ->
-  -- | initial state
+    -- | initial state
-  state ->
+    state ->
-  Parser Text
+    Parser Text
 scan f initState = do
-  parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
+    parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
-
+    (remaining, finalPos, ct) <- go input initState pos 0
-  (remaining, finalPos, ct) <- go input initState pos 0
+    let newState = parserState{stateInput = remaining, statePos = finalPos}
-  let newState = parserState{stateInput = remaining, statePos = finalPos}
+    Parsec.setParserState newState $> Text.take ct input
-  Parsec.setParserState newState $> Text.take ct input
+  where
- where
+    go !input' !st !posAccum !count' = case Text.uncons input' of
-  go !input' !st !posAccum !count' = case Text.uncons input' of
+        Nothing -> pure (input', posAccum, count')
-    Nothing -> pure (input', posAccum, count')
+        Just (char', input'') -> case f st char' of
-    Just (char', input'') -> case f st char' of
+            Nothing -> pure (input', posAccum, count')
-      Nothing -> pure (input', posAccum, count')
+            Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
      Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
--- a/src/Parser/Util.hs
+++ b/src/Parser/Util.hs
@ -13,9 +13,7 @@ import Text.Parsec.Pos (updatePosChar)
 Return everything consumed except for the end pattern itself.
 -}
 takeUntil :: Text -> Parser Text
-takeUntil end_ =
+takeUntil end_ = Text.dropEnd (Text.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome
  requireEnd (scan p (False, end))
    >>= gotSome . Text.dropEnd (Text.length end_)
 where
  end = Text.unpack end_
--- a/src/Types.hs
+++ b/src/Types.hs
@ -9,8 +9,6 @@ module Types (
 )
 where
 import Data.Foldable (fold)
 newtype Document = Document
  { meta :: Meta
  }
@ -30,7 +28,6 @@ data Since = Since
 -- Could have a better type?
 type Version = [Int]
 type Package = String
 data DocMarkup mod id
@ -54,78 +51,78 @@ data DocMarkup mod id
  | -- | Bold __bold text__
    DocBold (DocMarkup mod id)
  | {- | Unordered lists
-     * this
+      * this
-     or
+      or
-     - this
+      - this
    -}
    DocUnorderedList [DocMarkup mod id]
  | {- | Ordered lists
-     1. this
+      1. this
-     or
+      or
-     (1) this
+      (1) this
    -}
    DocOrderedList [(Int, DocMarkup mod id)]
  | {- | Definition lists
-     [term] a term
+      [term] a term
-     [another term] another definition
+      [another term] another definition
    -}
    DocDefinitionList [(DocMarkup mod id, DocMarkup mod id)]
  | {- | Code blocks
-     @
+      @
-     a code block in here
+      a code block in here
-     with multiple lines
+      with multiple lines
-     @
+      @
-     Or with bird tracks:
+      Or with bird tracks:
-     > some code
+      > some code
-     > goes here
+      > goes here
    -}
    DocCodeBlock (DocMarkup mod id)
  | {- | Hyperlinks
-     __marked__:
+      __marked__:
-     <http://example.com>
+      <http://example.com>
-     <http://example.com label text>
+      <http://example.com label text>
-     __Auto-detected URLs__:
+      __Auto-detected URLs__:
-     http://example.com
+      http://example.com
-     https://example.com
+      https://example.com
-     ftp://example.com
+      ftp://example.com
-     __Markdown style__
+      __Markdown style__
-     [link text](http://example.com)
+      [link text](http://example.com)
-     [link text]("Module.Name")
+      [link text]("Module.Name")
    -}
    DocHyperlink (Hyperlink (DocMarkup mod id))
  | {- | Pictures
-       <<image.png>>
+        <<image.png>>
-       <<image.png title text>>
+        <<image.png title text>>
-       __Markdown Images__
+        __Markdown Images__
-       ![alt text](image.png)
+        ![alt text](image.png)
    -}
    DocPicture Picture
  | {- | Inline math expressions
-     \(mathematical expression\)
+      \(mathematical expression\)
    -}
    DocMathInline String
  | {- | Math multiline display
-     \[
+    \[
-     mathematical expression
+      mathematical expression
-     in multiple lines
+      in multiple lines
-     \]
+    \]
    -}
    DocMathDisplay String
  | {- | Anchors, no spaces allowed
-     #anchor-name#
+      #anchor-name#
    -}
    DocAnchor String
  | {- | Property descriptions
-     prop> property description
+      prop> property description
    -}
    DocProperty String
  | {- | Examples
-     >>> expression
+      >>> expression
-     result line 1
+      result line 1
-     result line 2
+      result line 2
    -}
    DocExamples [Example]
  | -- | Header
@ -139,7 +136,7 @@ instance Semigroup (DocMarkup mod id) where
 instance Monoid (DocMarkup mod id) where
  mempty = DocEmpty
-  mconcat = fold
+  mconcat = foldr (<>) mempty
 data ModuleLink id = ModuleLink
  { name :: String
--- a/test/Spec.hs
+++ b/test/Spec.hs
@ -1,42 +1,42 @@
 {-# LANGUAGE OverloadedStrings #-}
 {-# OPTIONS_GHC -Wno-orphans #-}
-import Data.String (IsString (..))
+import Test.Hspec
-import Data.Text (Text)
+
 import GHC.Stack
 import Identifier (Identifier)
 import Lexer
 import Parser
 import Types
-import Test.Hspec
+import Data.String (IsString (..))
 import Data.Text (Text)
 import Text.Parsec.Pos
 import GHC.Stack
 main :: IO ()
 main = hspec $ do
-  describe "Lexer" do
+    describe "Lexer" do
-    describe "minimal" do
+        describe "minimal" do
-      it "handles unicode" unicode
+            it "handles unicode" unicode
-      it "escapes" escaping
+            it "escapes" escaping
-      it "maths" math
+            it "maths" math
-      it "anchors" anchor
+            it "anchors" anchor
-      it "space chars" space
+            it "space chars" space
-      it "bare string" someString
+            it "bare string" someString
-      it "emphasis" emphatic
+            it "emphasis" emphatic
-      it "monospace" monospace
+            it "monospace" monospace
-      it "labeled link" labeledLink
+            it "labeled link" labeledLink
-      it "markdown link" link
+            it "markdown link" link
-      it "bird tracks" birdTracks
+            it "bird tracks" birdTracks
-      it "module names" modules
+            it "module names" modules
-      it "quotes" quotes
+            it "quotes" quotes
-      it "numeric entity" numericEntity
+            it "ignores nesting" ignoreNesting
      it "ignores nesting" ignoreNesting
-  describe "Parser" do
+    describe "Parser" do
-    it "Bold" do
+        it "Bold" do
-      "__bold__" `shouldParseTo` DocBold (DocString "bold")
+            "__bold__" `shouldParseTo` (DocBold (DocString "bold"))
-    it "Emphasis" do
+        it "Emphasis" do
-      "/emphasis/" `shouldParseTo` DocEmphasis (DocString "emphasis")
+            "/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis"))
 ------------
 -- Tests
@ -44,150 +44,137 @@ main = hspec $ do
 modules :: Expectation
 modules = do
-  "\"MyModule.Name\""
+    "\"MyModule.Name\""
-    `shouldLexTo` [ (1, 2, Module "MyModule.Name")
+        `shouldLexTo` [ (1, 2, Module "MyModule.Name")
-                  ]
+                      ]
-  "\"OtherModule.Name#myAnchor\""
+    "\"OtherModule.Name#myAnchor\""
-    `shouldLexTo` [ (1, 2, Module "OtherModule.Name")
+        `shouldLexTo` [ (1, 2, Module "OtherModule.Name")
-                  , (1, 18, Anchor "myAnchor")
+                      , (1, 18, Anchor "myAnchor")
-                  ]
+                      ]
  "\"OtherModule.Name\\#myAnchor\""
    `shouldLexTo` [ (1, 2, Module "OtherModule.Name")
                  , (1, 18, Anchor "myAnchor")
                  ]
    "\"OtherModule.Name\\#myAnchor\""
        `shouldLexTo` [ (1, 2, Module "OtherModule.Name")
                      , (1, 18, Anchor "myAnchor")
                      ]
 link :: Expectation
 link =
-  "[link to](http://some.website)"
+    "[link to](http://some.website)"
-    `shouldLexTo` [ (1, 1, LinkOpen)
+        `shouldLexTo` [ (1, 1, LinkOpen)
-                  , (1, 1, BracketOpen)
+                      , (1, 1, BracketOpen)
-                  , (1, 2, Token "link to")
+                      , (1, 2, Token "link to")
-                  , (1, 9, BracketClose)
+                      , (1, 9, BracketClose)
-                  , (1, 10, ParenOpen)
+                      , (1, 10, ParenOpen)
-                  , (1, 11, Token "http://some.website")
+                      , (1, 11, Token "http://some.website")
-                  , (1, 30, ParenClose)
+                      , (1, 30, ParenClose)
-                  , (1, 30, LinkClose)
+                      , (1, 30, LinkClose)
-                  ]
+                      ]
 labeledLink :: Expectation
 labeledLink =
-  "<[link here](http://to.here) label>"
+    "<[link here](http://to.here) label>"
-    `shouldLexTo` [ (1, 1, LabeledLinkOpen)
+        `shouldLexTo` [ (1, 1, LabeledLinkOpen)
-                  , (1, 2, BracketOpen)
+                      , (1, 2, BracketOpen)
-                  , (1, 3, Token "link here")
+                      , (1, 3, Token "link here")
-                  , (1, 12, BracketClose)
+                      , (1, 12, BracketClose)
-                  , (1, 13, ParenOpen)
+                      , (1, 13, ParenOpen)
-                  , (1, 14, Token "http://to.here")
+                      , (1, 14, Token "http://to.here")
-                  , (1, 28, ParenClose)
+                      , (1, 28, ParenClose)
-                  , (1, 29, Token " label")
+                      , (1, 29, Token " label")
-                  , (1, 35, LabeledLinkClose)
+                      , (1, 35, LabeledLinkClose)
-                  ]
+                      ]
 anchor :: Expectation
 anchor =
-  "#myAnchor#"
+    "#myAnchor#"
-    `shouldLexTo` [ (1, 1, Anchor "myAnchor")
+        `shouldLexTo` [ (1, 1, Anchor "myAnchor")
-                  ]
+                      ]
 math :: IO ()
 math = do
-  "\\[some math\\]"
+    "\\[some math\\]"
-    `shouldLexTo` [ (1, 1, MathMultilineOpen)
+        `shouldLexTo` [ (1, 1, MathMultilineOpen)
-                  , (1, 3, Token "some math")
+                      , (1, 3, Token "some math")
-                  , (1, 12, MathMultilineClose)
+                      , (1, 12, MathMultilineClose)
-                  ]
+                      ]
-  "\\(other maths\\)"
+    "\\(other maths\\)"
-    `shouldLexTo` [ (1, 1, MathInlineOpen)
+        `shouldLexTo` [ (1, 1, MathInlineOpen)
-                  , (1, 3, Token "other maths")
+                      , (1, 3, Token "other maths")
-                  , (1, 14, MathInlineClose)
+                      , (1, 14, MathInlineClose)
-                  ]
+                      ]
 escaping :: Expectation
 escaping = do
-  "\\("
+    "\\("
-    `shouldLexTo` [ (1, 1, Escape)
+        `shouldLexTo` [ (1, 1, Escape)
-                  , (1, 2, Token "(")
+                      , (1, 2, Token "(")
-                  ]
+                      ]
-  "\\(\r\n"
+    "\\(\r\n"
-    `shouldLexTo` [ (1, 1, Escape)
+        `shouldLexTo` [ (1, 1, Escape)
-                  , (1, 2, Token "(")
+                      , (1, 2, Token "(")
-                  ]
+                      ]
 unicode :: Expectation
 unicode =
-  "ドラゴンクエストの冒険者🐉"
+    "ドラゴンクエストの冒険者🐉"
-    `shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者🐉")
+        `shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者🐉")
-                  ]
+                      ]
 ignoreNesting :: Expectation
 ignoreNesting =
-  ">/foo/"
+    ">/foo/"
-    `shouldLexTo` [ (1, 1, Token ">/foo/")
+        `shouldLexTo` [ (1, 1, Token ">/foo/")
-                  ]
+                      ]
 birdTracks :: Expectation
 birdTracks =
-  ">> code"
+    ">> code"
-    `shouldLexTo` [ (1, 1, BirdTrack)
+        `shouldLexTo` [ (1, 1, BirdTrack)
-                  , (1, 4, Token "code")
+                      , (1, 4, Token "code")
-                  ]
+                      ]
 quotes :: Expectation
 quotes =
-  "\"quoted\""
+    "\"quoted\""
-    `shouldLexTo` [ (1, 1, QuoteOpen)
+        `shouldLexTo` [ (1, 1, QuoteOpen)
-                  , (1, 2, Token "quoted")
+                      , (1, 2, Token "quoted")
-                  , (1, 8, QuoteClose)
+                      , (1, 8, QuoteClose)
-                  ]
+                      ]
 space :: Expectation
 space = do
-  "\n "
+    "\n "
-    `shouldLexTo` [ (1, 1, Newline)
+        `shouldLexTo` [ (1, 1, Newline)
-                  , (2, 1, Space)
+                      , (2, 1, Space)
-                  ]
+                      ]
-  " \n"
+    " \n"
-    `shouldLexTo` [ (1, 1, Space)
+        `shouldLexTo` [ (1, 1, Space)
-                  , (1, 2, Newline)
+                      , (1, 2, Newline)
-                  ]
+                      ]
 numericEntity :: Expectation
 numericEntity = do
  "&#65; &#955;"
    `shouldLexTo` [ (1, 1, NumericEntity 65)
                  , (1, 6, Space)
                  , (1, 7, NumericEntity 955) -- lambda
                  ]
  -- Hex
  "&#x65;"
    `shouldLexTo` [ (1, 1, NumericEntity 101)
                  ]
 monospace :: Expectation
 monospace =
-  "@mono@"
+    "@mono@"
-    `shouldLexTo` [ (1, 1, MonospaceOpen)
+        `shouldLexTo` [ (1, 1, MonospaceOpen)
-                  , (1, 2, Token "mono")
+                      , (1, 2, Token "mono")
-                  , (1, 6, MonospaceClose)
+                      , (1, 6, MonospaceClose)
-                  ]
+                      ]
 emphatic :: Expectation
 emphatic =
-  "/emphatic/"
+    "/emphatic/"
-    `shouldLexTo` [ (1, 1, EmphasisOpen)
+        `shouldLexTo` [ (1, 1, EmphasisOpen)
-                  , (1, 2, Token "emphatic")
+                      , (1, 2, Token "emphatic")
-                  , (1, 10, EmphasisClose)
+                      , (1, 10, EmphasisClose)
-                  ]
+                      ]
 someString :: Expectation
 someString =
-  "some string"
+    "some string"
-    `shouldLexTo` [ (1, 1, Token "some")
+        `shouldLexTo` [ (1, 1, Token "some")
-                  , (1, 5, Space)
+                      , (1, 5, Space)
-                  , (1, 6, Token "string")
+                      , (1, 6, Token "string")
-                  ]
+                      ]
 --------------
 -- Helpers
@ -196,16 +183,16 @@ someString =
 type Doc id = DocMarkup () id
 instance IsString (Doc String) where
-  fromString = DocString
+    fromString = DocString
 shouldLexTo :: String -> [(Int, Int, Token)] -> Expectation
 shouldLexTo input expected =
-  withFrozenCallStack $
+   withFrozenCallStack $
    case lexer input of
-      Right tokens -> do
+        Right tokens -> do
-        let actual = map (\(pos, tok) -> (sourceLine pos, sourceColumn pos, tok)) tokens
+            let actual = map (\(pos, tok) -> (sourceLine pos, sourceColumn pos, tok)) tokens
-        actual `shouldBe` expected
+            actual `shouldBe` expected
-      Left err -> expectationFailure $ "Parse error: " <> show err
+        Left err -> expectationFailure $ "Parse error: " <> show err
 shouldParseTo :: Text -> DocMarkup mod Identifier -> Expectation
 shouldParseTo input ast = parseText input `shouldBe` ast
Author	SHA1	Message	Date
Léana 江	970b658926	chore(lexer): clean up	2025-09-24 22:32:52 +08:00
Léana 江	2597e693f1	ref(lexer): simplify labeledLink	2025-09-24 22:31:59 +08:00
Léana 江	29c015b793	style(lexer): make binding naming consistent	2025-09-24 22:25:45 +08:00
Léana 江	326c7b681c	fix(lexer): old anchor is only used in moduleName	2025-09-24 22:20:32 +08:00
Léana 江	c4d59d3236	ref(lexer): rename MathsBracket -> MathMultiline	2025-09-24 21:35:21 +08:00
Léana 江	6ec47dad04	ref(lexer): rename MathParen -> MathInline	2025-09-24 21:31:26 +08:00
Léana 江	f3b3b08919	style(lexer): use "open" "close" in the type	2025-09-24 21:28:17 +08:00
Léana 江	6c0b4a4288	doc(lexer): explain the use of incSourceColumn I think it is clearer to phrase it this way so it is clear that we are not unconsuming (i.e. changing the state of the parser).	2025-09-24 21:25:46 +08:00
Léana 江	75c4817166	style(lexer): pluralize moduleNames parser	2025-09-24 21:25:46 +08:00
Léana 江	7ceb9b0277	ref(moduleName): break into multiple smaller functions upperId has been changed to only use isUpper because an non alphabetical character would be false anyway	2025-09-24 21:25:46 +08:00
Léana 江	368e5bc9a0	ref(lexer): simplify anchor	2025-09-24 21:25:46 +08:00
Léana 江	d6087ec3d6	ref(lexer): simplify delimited logic	2025-09-24 21:25:46 +08:00
Léana 江	fdb9070e99	fix(lexer): handle crlf in newline Do we support windows 🤔	2025-09-24 21:25:46 +08:00
		`@ -1,2 +0,0 @@`
			`# Fourmolu`
			`9998ac92263127b05fd1eb607f3b7740c69d3a58`