Compare commits

..

1 commit

Author SHA1 Message Date
21241d44ed
test: freeze the callstack for better failure messages 2025-09-24 17:07:02 +08:00
16 changed files with 378 additions and 847 deletions

1
.envrc
View file

@ -1 +0,0 @@
use nix

View file

@ -1,2 +0,0 @@
runs-on: self-hosted

View file

@ -1,99 +0,0 @@
name: Haskell CI
on:
pull_request:
branches:
- dev
- main
push:
branches:
- main
jobs:
build:
runs-on: docker
container:
image: elland/haddock2:latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Check versions
run: |
ghc --version
cabal --version
node --version
- name: Cache Cabal packages
uses: actions/cache@v4
with:
path: |
~/.cabal/packages
~/.cabal/store
dist-newstyle
key: ${{ runner.os }}-haskell-9.10-cabal-${{ hashFiles('**/*.cabal', '**/cabal.project') }}
restore-keys: |
${{ runner.os }}-haskell-9.10-cabal-
- name: Update Cabal package index
run: cabal update
- name: Configure project
run: cabal configure --enable-tests --enable-benchmarks
- name: Build dependencies
run: cabal build --only-dependencies --enable-tests --enable-benchmarks
- name: Build project
run: cabal build --enable-tests --enable-benchmarks
- name: Run documentation build
run: cabal haddock
test:
runs-on: docker
container:
image: elland/haddock2:latest
needs: build
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Cache Cabal packages
uses: actions/cache@v4
with:
path: |
~/.cabal/packages
~/.cabal/store
dist-newstyle
key: ${{ runner.os }}-haskell-9.10-cabal-${{ hashFiles('**/*.cabal', '**/cabal.project') }}
restore-keys: |
${{ runner.os }}-haskell-9.10-cabal-
- name: Update Cabal package index
run: cabal update
- name: Configure project
run: cabal configure --enable-tests --enable-benchmarks
- name: Build dependencies
run: cabal build --only-dependencies --enable-tests --enable-benchmarks
- name: Build project
run: cabal build --enable-tests --enable-benchmarks
- name: Run tests
run: cabal test --test-show-details=direct
fourmolu:
runs-on: docker
container:
image: elland/haddock2:latest
needs: build
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Run fourmolu
run: |
find src test app -name "*.hs" -exec fourmolu --check-idempotence {} \; 2>/dev/null || true
find src test app -name "*.hs" -exec fourmolu --mode check {} \;
hlint:
runs-on: docker
container:
image: elland/haddock2:latest
needs: build
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Run hlint
run: |
if [ -d src ]; then hlint src/; fi
if [ -d test ]; then hlint test/; fi
if [ -d app ]; then hlint app/; fi

View file

@ -1,2 +0,0 @@
# Fourmolu
9998ac92263127b05fd1eb607f3b7740c69d3a58

View file

@ -1,36 +0,0 @@
FROM haskell:9.10.2-bullseye AS builder
RUN apt-get update && apt-get install -y curl git && rm -rf /var/lib/apt/lists/*
RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \
apt-get install -y nodejs && \
rm -rf /var/lib/apt/lists/*
RUN cabal update && \
cabal install --install-method=copy --installdir=/usr/local/bin \
fourmolu hlint cabal-gild
WORKDIR /workspace
FROM haskell:9.10.2-bullseye
RUN apt-get update && apt-get install -y \
libgmp10 \
curl \
&& rm -rf /var/lib/apt/lists/*
RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \
apt-get install -y nodejs && \
rm -rf /var/lib/apt/lists/*
RUN cabal update
COPY --from=builder /usr/local/bin/cabal /usr/local/bin/
COPY --from=builder /usr/local/bin/fourmolu /usr/local/bin/
COPY --from=builder /usr/local/bin/hlint /usr/local/bin/
COPY --from=builder /usr/local/bin/cabal-gild /usr/local/bin/
WORKDIR /workspace
CMD [ "bash" ]

View file

@ -12,7 +12,7 @@ bold ::= '__' text_no_newline '__'
monospace ::= '@' text_content '@' monospace ::= '@' text_content '@'
link ::= module_link | hyperlink | markdown_link link ::= module_link | hyperlink | markdown_link
module_link ::= '"' module_name ( ('#' | '\#') anchor_name )? '"' module_link ::= '"' module_name ( '#' anchor_name )? '"'
hyperlink ::= '<' url ( ' ' link_text )? '>' hyperlink ::= '<' url ( ' ' link_text )? '>'
markdown_link ::= '[' link_text '](' ( url | module_link ) ')' markdown_link ::= '[' link_text '](' ( url | module_link ) ')'

View file

@ -1,44 +0,0 @@
.PHONY: help
help: ## Show this help
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
.PHONY: build
build: ## Build the project
cabal build
.PHONY: test
test: ## Run tests
cabal test --test-show-details=direct
.PHONY: clean
clean: ## Clean build artifacts
cabal clean
.PHONY: fourmolu
fourmolu: ## Format Haskell code
find . -type f -name "*.hs" ! -path "./dist-newstyle/*" -exec fourmolu -i {} +
.PHONY: fourmolu-check
fourmolu-check: ## Check if code is formatted
find . -type f -name "*.hs" ! -path "./dist-newstyle/*" -exec fourmolu --mode check {} \;
.PHONY: lint
lint: ## Run hlint
hlint src test app
.PHONY: cabal-gild
cabal-gild: ## Format cabal file
cabal-gild --io=haddock2.cabal
.PHONY: format
format: fourmolu cabal-gild ## Run all formatters
.PHONY: check
check: fourmolu-check lint ## Run all checks (CI-style)
.PHONY: ci
ci: build test check ## Run full CI pipeline locally
.PHONY: docs
docs: ## Generate documentation
cabal haddock --haddock-hyperlink-source

View file

@ -1,72 +0,0 @@
# Number of spaces per indentation step
indentation: 2
# Max line length for automatic line breaking
column-limit: none
# Styling of arrows in type signatures (choices: trailing, leading, or leading-args)
function-arrows: trailing
# How to place commas in multi-line lists, records, etc. (choices: leading or trailing)
comma-style: leading
# Styling of import/export lists (choices: leading, trailing, or diff-friendly)
import-export-style: diff-friendly
# Rules for grouping import declarations
import-grouping: legacy
# Whether to full-indent or half-indent 'where' bindings past the preceding body
indent-wheres: false
# Whether to leave a space before an opening record brace
record-brace-space: false
# Number of spaces between top-level declarations
newlines-between-decls: 1
# How to print Haddock comments (choices: single-line, multi-line, or multi-line-compact)
haddock-style: multi-line
# How to print module docstring
haddock-style-module: null
# Styling of let blocks (choices: auto, inline, newline, or mixed)
let-style: auto
# How to align the 'in' keyword with respect to the 'let' keyword (choices: left-align, right-align, or no-space)
in-style: right-align
# Whether to put parentheses around a single constraint (choices: auto, always, or never)
single-constraint-parens: always
# Whether to put parentheses around a single deriving class (choices: auto, always, or never)
single-deriving-parens: always
# Whether to sort constraints
sort-constraints: false
# Whether to sort derived classes
sort-derived-classes: false
# Whether to sort deriving clauses
sort-deriving-clauses: false
# Whether to place section operators (those that are infixr 0, such as $) in trailing position, continuing the expression indented below
trailing-section-operators: true
# Output Unicode syntax (choices: detect, always, or never)
unicode: never
# Give the programmer more choice on where to insert blank lines
respectful: true
# Fixity information for operators
fixities: []
# Module reexports Fourmolu should know about
reexports: []
# Modules defined by the current Cabal package for import grouping
local-modules: []

View file

@ -46,10 +46,10 @@ test-suite haddock2-test
type: exitcode-stdio-1.0 type: exitcode-stdio-1.0
main-is: Spec.hs main-is: Spec.hs
build-depends: build-depends:
parsec ^>=3.1.18.0,
base >=4.20.1.0, base >=4.20.1.0,
haddock2:{haddock2-lib}, haddock2:{haddock2-lib},
hspec ^>=2.11.0, hspec ^>=2.11.0,
parsec ^>=3.1.18.0,
text ^>=2.1.2, text ^>=2.1.2,
hs-source-dirs: test hs-source-dirs: test

View file

@ -1,146 +0,0 @@
/*
This file is provided under the MIT licence:
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the Software), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
# Generated by npins. Do not modify; will be overwritten regularly
let
data = builtins.fromJSON (builtins.readFile ./sources.json);
version = data.version;
# https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/lists.nix#L295
range =
first: last: if first > last then [ ] else builtins.genList (n: first + n) (last - first + 1);
# https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L257
stringToCharacters = s: map (p: builtins.substring p 1 s) (range 0 (builtins.stringLength s - 1));
# https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L269
stringAsChars = f: s: concatStrings (map f (stringToCharacters s));
concatMapStrings = f: list: concatStrings (map f list);
concatStrings = builtins.concatStringsSep "";
# If the environment variable NPINS_OVERRIDE_${name} is set, then use
# the path directly as opposed to the fetched source.
# (Taken from Niv for compatibility)
mayOverride =
name: path:
let
envVarName = "NPINS_OVERRIDE_${saneName}";
saneName = stringAsChars (c: if (builtins.match "[a-zA-Z0-9]" c) == null then "_" else c) name;
ersatz = builtins.getEnv envVarName;
in
if ersatz == "" then
path
else
# this turns the string into an actual Nix path (for both absolute and
# relative paths)
builtins.trace "Overriding path of \"${name}\" with \"${ersatz}\" due to set \"${envVarName}\"" (
if builtins.substring 0 1 ersatz == "/" then
/. + ersatz
else
/. + builtins.getEnv "PWD" + "/${ersatz}"
);
mkSource =
name: spec:
assert spec ? type;
let
path =
if spec.type == "Git" then
mkGitSource spec
else if spec.type == "GitRelease" then
mkGitSource spec
else if spec.type == "PyPi" then
mkPyPiSource spec
else if spec.type == "Channel" then
mkChannelSource spec
else if spec.type == "Tarball" then
mkTarballSource spec
else
builtins.throw "Unknown source type ${spec.type}";
in
spec // { outPath = mayOverride name path; };
mkGitSource =
{
repository,
revision,
url ? null,
submodules,
hash,
branch ? null,
...
}:
assert repository ? type;
# At the moment, either it is a plain git repository (which has an url), or it is a GitHub/GitLab repository
# In the latter case, there we will always be an url to the tarball
if url != null && !submodules then
builtins.fetchTarball {
inherit url;
sha256 = hash; # FIXME: check nix version & use SRI hashes
}
else
let
url =
if repository.type == "Git" then
repository.url
else if repository.type == "GitHub" then
"https://github.com/${repository.owner}/${repository.repo}.git"
else if repository.type == "GitLab" then
"${repository.server}/${repository.repo_path}.git"
else
throw "Unrecognized repository type ${repository.type}";
urlToName =
url: rev:
let
matched = builtins.match "^.*/([^/]*)(\\.git)?$" url;
short = builtins.substring 0 7 rev;
appendShort = if (builtins.match "[a-f0-9]*" rev) != null then "-${short}" else "";
in
"${if matched == null then "source" else builtins.head matched}${appendShort}";
name = urlToName url revision;
in
builtins.fetchGit {
rev = revision;
inherit name;
# hash = hash;
inherit url submodules;
};
mkPyPiSource =
{ url, hash, ... }:
builtins.fetchurl {
inherit url;
sha256 = hash;
};
mkChannelSource =
{ url, hash, ... }:
builtins.fetchTarball {
inherit url;
sha256 = hash;
};
mkTarballSource =
{
url,
locked_url ? url,
hash,
...
}:
builtins.fetchTarball {
url = locked_url;
sha256 = hash;
};
in
if version == 5 then
builtins.mapAttrs mkSource data.pins
else
throw "Unsupported format version ${toString version} in sources.json. Try running `npins upgrade`"

View file

@ -1,11 +0,0 @@
{
"pins": {
"nixpkgs": {
"type": "Channel",
"name": "nixpkgs-unstable",
"url": "https://releases.nixos.org/nixpkgs/nixpkgs-25.11pre868532.647e5c14cbd5/nixexprs.tar.xz",
"hash": "0i6mgl7pm7y4ydrrll7szmv8hhxb3cyny8x1g1a8sp3g5wl3yd9g"
}
},
"version": 5
}

View file

@ -1,25 +0,0 @@
let
sources = import ./npins;
in
{
pkgs ? import sources.nixpkgs { },
}:
pkgs.mkShell rec {
name = "haddock2";
packages =
with pkgs;
[
haskell.packages.ghc912.ghc
haskell.packages.ghc912.haskell-language-server
zlib
]
++ map haskell.lib.justStaticExecutables [
haskellPackages.cabal-gild
haskellPackages.fourmolu
cabal-install
];
env.LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath packages;
}

View file

@ -1,249 +1,262 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Lexer ( module Lexer (
Token (..), Token (..),
lexer, lexer,
emphasis, emphasis,
) ) where
where
import Control.Monad (mfilter, void) import Control.Monad (mfilter, void)
import Data.Char (ord, toLower)
import Data.Functor (($>)) import Data.Functor (($>))
import Data.Text (Text, intercalate) import Data.Text (Text, intercalate)
import Data.Text qualified as Text import Data.Text qualified as Text
import GHC.Unicode (isAlphaNum, isControl, isDigit, isPrint, isSpace, isUpper) import GHC.Unicode (isAlpha, isAlphaNum, isControl, isPrint, isSpace, isUpper)
import ParserMonad (Parser, initialParserState) import ParserMonad (Parser, initialParserState)
import Text.Parsec import Text.Parsec
import Text.Parsec qualified as Parsec import Text.Parsec qualified as Parsec
import Text.Parsec.Pos (updatePosChar) import Text.Parsec.Pos (updatePosChar)
type Located a = (SourcePos, a)
type LocatedToken = (SourcePos, Token) type LocatedToken = (SourcePos, Token)
type Lexer = Parser [LocatedToken] type Lexer = Parser [LocatedToken]
data Level data Level
= One = One
| Two | Two
| Three | Three
| Four | Four
| Five | Five
| Six | Six
deriving (Eq, Show) deriving (Eq, Show)
data Token data Token
= Token Text = Token Text
| Anchor Text | Anchor Text
| BirdTrack | BirdTrack
| BoldOpen | BoldOpen
| BoldClose | BoldClose
| Escape | Escape
| EmphasisOpen | EmphasisOpen
| EmphasisClose | EmphasisClose
| Header Level | Header Level
| MonospaceOpen | MonospaceOpen
| MonospaceClose | MonospaceClose
| Newline | Newline
| LinkOpen | LinkOpen
| LinkClose | LinkClose
| LabeledLinkOpen | LabeledLinkOpen
| LabeledLinkClose | LabeledLinkClose
| ParenOpen | ParenOpen
| ParenClose | ParenClose
| BracketOpen | BracketOpen
| BracketClose | BracketClose
| MathInlineOpen | MathsParenOpen
| MathInlineClose | MathsParenClose
| MathMultilineOpen | MathsBracketOpen
| MathMultilineClose | MathsBracketClose
| NumericEntity Int | NumericEntity Int
| Module Text | Module
| QuoteOpen | QuoteOpen
| QuoteClose | QuoteClose
| Space | Space
| EOF | EOF
deriving (Eq, Show) deriving (Eq, Show)
located :: Parser a -> Parser (SourcePos, a) located :: Parser a -> Parser (SourcePos, a)
located p = (,) <$> getPosition <*> p located p = (,) <$> getPosition <*> p
startPosition :: Parser a -> Parser SourcePos
startPosition = fmap fst . located
tokenise :: [Parser a] -> Parser [(SourcePos, a)] tokenise :: [Parser a] -> Parser [(SourcePos, a)]
tokenise = mapM located tokenise = sequence . map located
lexer :: String -> Either ParseError [LocatedToken] lexer :: String -> Either ParseError [LocatedToken]
lexer = Parsec.runParser lexText initialParserState "input" . Text.pack lexer = Parsec.runParser lexText initialParserState "input" . Text.pack
lexText :: Parser [LocatedToken] lexText :: Parser [LocatedToken]
lexText = go lexText = go
where where
go = do go = do
Parsec.optionMaybe Parsec.eof >>= \case Parsec.optionMaybe Parsec.eof >>= \case
Just _ -> pure [] Just _ -> pure []
Nothing -> do Nothing -> do
toks <- toks <-
choice $ choice $
Parsec.try Parsec.try
<$> [ mathMultiline <$> [ mathsBracket
, mathInline , mathsParens
, escape -- maths go before escape to avoid mismatch , escape -- maths go before escape to avoid mismatch
, headers , headers
, newlineToken , newlineToken
, spaceToken , spaceToken
, link , link
, labeledLink , labeledLink
, module_ , modules
, anchor , anchors
, numericEntity , textElement
, textElement , quotes
, quotes , birdTrack
, birdTrack , other
, other ]
] rest <- go
rest <- go pure (toks <> rest)
pure (toks <> rest)
-- Tokens -- Tokens
textElement :: Parser [LocatedToken] textElement :: Parser [LocatedToken]
textElement = textElement =
choice $ choice $
Parsec.try Parsec.try
<$> [ emphasis <$> [ emphasis
, bold , bold
, monospace , monospace
] ]
headers :: Parser [LocatedToken] headers :: Parser [LocatedToken]
headers = headers =
choice $ choice $
Parsec.try Parsec.try
<$> [ header1 <$> [ header1
, header2 , header2
, header3 , header3
, header4 , header4
, header5 , header5
, header6 , header6
] ]
delimitedMaybe :: Parser a -> Parser a -> Token -> Maybe Token -> Parser [LocatedToken]
delimitedMaybe openMark closeMark openToken closeToken = do
openPos <- getPosition
void openMark
tokenPos <- getPosition
content <- anyUntil closeMark
closePos <- getPosition
void closeMark
let openTok :: LocatedToken = (openPos, openToken)
res :: LocatedToken = (tokenPos, Token content)
closeToks :: [LocatedToken] = case closeToken of
Just close -> [(closePos, close)]
Nothing -> []
pure $ [openTok, res] <> closeToks
anyUntil :: Parser a -> Parser Text anyUntil :: Parser a -> Parser Text
anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p) anyUntil p = Text.pack <$> manyTill anyChar (lookAhead p)
delimitedAsTuple :: Parser open -> Parser close -> Parser (Located open, LocatedToken, Located close) delimited :: Parser a -> Parser a -> Token -> Token -> Parser [LocatedToken]
delimitedAsTuple openP closeP = delimited a b c d = delimitedMaybe a b c (Just d)
(,,)
<$> located openP
<*> located (Token <$> anyUntil closeP)
<*> located closeP
delimited :: Parser open -> Parser close -> Token -> Token -> Parser [LocatedToken]
delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP)
where
asList (a, tok, b) = [a, tok, b]
delimitedNoTrailing :: Parser open -> Parser close -> Token -> Parser [LocatedToken]
delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP)
where
asList (a, tok, _) = [a, tok]
delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken]
delimitedSymmetric s = delimited s s delimitedSymmetric s t1 t2 = delimited s s t1 t2
eol :: Parser () eol :: Parser ()
eol = void "\n" <|> void "\r\n" <|> Parsec.eof eol = void "\n" <|> Parsec.eof
header1 :: Lexer header1 :: Lexer
header1 = delimitedNoTrailing "= " eol (Header One) header1 = delimitedMaybe (void $ "= ") eol (Header One) Nothing
header2 :: Lexer header2 :: Lexer
header2 = delimitedNoTrailing "== " eol (Header Two) header2 = delimitedMaybe (void $ "== ") eol (Header Two) Nothing
header3 :: Lexer header3 :: Lexer
header3 = delimitedNoTrailing "=== " eol (Header Three) header3 = delimitedMaybe (void $ "=== ") eol (Header Three) Nothing
header4 :: Lexer header4 :: Lexer
header4 = delimitedNoTrailing "==== " eol (Header Four) header4 = delimitedMaybe (void $ "==== ") eol (Header Four) Nothing
header5 :: Lexer header5 :: Lexer
header5 = delimitedNoTrailing "===== " eol (Header Five) header5 = delimitedMaybe (void $ "===== ") eol (Header Five) Nothing
header6 :: Lexer header6 :: Lexer
header6 = delimitedNoTrailing "====== " eol (Header Six) header6 = delimitedMaybe (void $ "====== ") eol (Header Six) Nothing
-- #anchors# -- #anchors#
anchor :: Lexer anchors :: Lexer
anchor = do anchors = do
x <- located $ between "#" "#" (Anchor <$> anyUntil "#") pos <- getPosition
pure [x] void $ try anchor'
txt <- anyUntil anchor'
void $ try anchor'
moduleNames :: Parser Text pure [(pos, Anchor txt)]
moduleNames = intercalate "." . fmap Text.pack <$> upperId `sepBy1` char '.' where
anchor' = (string "#" <|> string "\\#")
upperId :: Parser String
upperId = (:) <$> satisfy isUpper <*> many1 identifierChar
identifierChar :: Parser Char
identifierChar = satisfy (\c -> isAlphaNum c || c == '_')
-- "Module.Name" -- "Module.Name"
-- "Module.Name#anchor" -- "Module.Name#anchor"
-- "Module.Name\#anchor" -- known as "old anchor". this has been deprecated for 9 years, thanks Ben -- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben
module_ :: Lexer modules :: Lexer
module_ = between (char '"') (char '"') inner modules = do
where startPos <- startPosition $ char '"'
inner = do (modPos, modName) <- located modId
m <- located $ Module <$> moduleNames anch <- option [] do
mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText)) anchPos <- startPosition (string "#" <|> string' "\\#")
pure $ case mAnchor of txt <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c)))
Just anc -> [m, anc] pure [(anchPos, Anchor txt)]
Nothing -> [m]
anchorHash :: Parser Text void $ char '"'
anchorHash = "#" <|> try "\\#" pure $ [(startPos, Module), (modPos, Token modName)] <> anch
where
modId = intercalate "." <$> (fmap Text.pack <$> (conId `sepBy1` (char '.')))
anchorText :: Parser Text conId :: Parser String
anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) conId =
(:)
<$> satisfy (\c -> isAlpha c && isUpper c)
<*> many1 conChar
conChar :: Parser Char
conChar = satisfy (\c -> isAlphaNum c || c == '_')
linkRaw :: Lexer linkRaw :: Lexer
linkRaw = linkRaw =
tokenise tokenise
[ BracketOpen <$ "[" [ BracketOpen <$ "["
, Token <$> anyUntil "]" , Token <$> anyUntil "]"
, BracketClose <$ "]" , BracketClose <$ "]"
, ParenOpen <$ "(" , ParenOpen <$ "("
, Token <$> anyUntil ")" , Token <$> anyUntil ")"
, ParenClose <$ ")" , ParenClose <$ ")"
] ]
link :: Lexer link :: Lexer
link = do link = do
pos <- getPosition pos <- getPosition
l <- linkRaw l <- linkRaw
-- register the position of the last token -- "unconsume" the last token
pos' <- flip incSourceColumn (-1) <$> getPosition pos' <- flip incSourceColumn (-1) <$> getPosition
pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)] pure $ (pos, LinkOpen) : l <> [(pos', LinkClose)]
labeledLink :: Lexer labeledLink :: Lexer
labeledLink = do labeledLink = do
open <- located $ LabeledLinkOpen <$ "<" pos <- getPosition
linkRes <- linkRaw void $ string "<"
labelRes <- located $ Token <$> anyUntil ">" link' <- linkRaw
close <- located $ LabeledLinkClose <$ ">" pos7 <- getPosition
pure $ label' <- anyUntil $ string ">"
open : linkRes <> [labelRes, close] pos8 <- getPosition
void $ ">"
mathMultiline :: Lexer pure $
mathMultiline = delimited "\\[" "\\]" MathMultilineOpen MathMultilineClose (pos, LabeledLinkOpen)
: link'
<> [ (pos7, Token label')
, (pos8, LabeledLinkClose)
]
mathInline :: Lexer mathsBracket :: Lexer
mathInline = delimited "\\(" "\\)" MathInlineOpen MathInlineClose mathsBracket = delimited (void $ "\\[") (void "\\]") MathsBracketOpen MathsBracketClose
mathsParens :: Lexer
mathsParens = delimited (void $ "\\(") (void "\\)") MathsParenOpen MathsParenClose
birdTrack :: Lexer birdTrack :: Lexer
birdTrack = delimitedNoTrailing ">> " eol BirdTrack birdTrack = delimitedMaybe (void ">> ") eol BirdTrack Nothing
escape :: Lexer escape :: Lexer
escape = delimitedNoTrailing "\\" eol Escape escape = delimitedMaybe (void "\\") eol Escape Nothing
quotes :: Lexer quotes :: Lexer
quotes = delimitedSymmetric "\"" QuoteOpen QuoteClose quotes = delimitedSymmetric "\"" QuoteOpen QuoteClose
@ -257,49 +270,25 @@ bold = delimitedSymmetric "__" BoldOpen BoldClose
monospace :: Lexer monospace :: Lexer
monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose monospace = delimitedSymmetric "@" MonospaceOpen MonospaceClose
decimal :: Parser Int
decimal = read . Text.unpack <$> takeWhile1_ isDigit
hexadecimal :: Parser Int
hexadecimal = "x" *> (convert 0 . fmap (normalise . toLower) <$> many1 hexDigit)
where
normalise :: Char -> Int
normalise c
| ord '0' <= n && n <= ord '9' = n - ord '0'
| ord 'A' <= n && n <= ord 'F' = n - ord 'A' + 10
| ord 'a' <= n && n <= ord 'f' = n - ord 'a' + 10
| otherwise = error "unexpected: invalid hex number"
where
n = ord c
convert :: Int -> [Int] -> Int
convert acc [] = acc
convert acc (x : xs) = convert (acc * 16 + x) xs
numericEntity :: Lexer
numericEntity = do
x <- located $ between "&#" ";" (NumericEntity <$> (hexadecimal <|> decimal))
pure [x]
other :: Lexer other :: Lexer
other = do other = do
pos <- getPosition pos <- getPosition
c <- takeWhile1_ isUnicodeAlphaNum c <- takeWhile1_ isUnicodeAlphaNum
pure . pure $ (pos, Token c) pure . pure $ (pos, Token c)
where where
isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c) isUnicodeAlphaNum c = isPrint c && not (isControl c) && not (isSpace c)
spaceToken :: Lexer spaceToken :: Lexer
spaceToken = do spaceToken = do
pos <- getPosition pos <- getPosition
_ <- many1 (char ' ') _ <- many1 (char ' ')
pure . pure $ (pos, Space) pure . pure $ (pos, Space)
newlineToken :: Lexer newlineToken :: Lexer
newlineToken = do newlineToken = do
pos <- getPosition pos <- getPosition
_ <- newline _ <- newline
pure . pure $ (pos, Newline) pure . pure $ (pos, Newline)
------- -------
-- Helpers -- Helpers
@ -308,11 +297,11 @@ newlineToken = do
-- | Like `takeWhile`, but unconditionally take escaped characters. -- | Like `takeWhile`, but unconditionally take escaped characters.
takeWhile_ :: (Char -> Bool) -> Parser Text takeWhile_ :: (Char -> Bool) -> Parser Text
takeWhile_ p = scan p_ False takeWhile_ p = scan p_ False
where where
p_ escaped c p_ escaped c
| escaped = Just False | escaped = Just False
| not $ p c = Nothing | not $ p c = Nothing
| otherwise = Just (c == '\\') | otherwise = Just (c == '\\')
-- | Like 'takeWhile1', but unconditionally take escaped characters. -- | Like 'takeWhile1', but unconditionally take escaped characters.
takeWhile1_ :: (Char -> Bool) -> Parser Text takeWhile1_ :: (Char -> Bool) -> Parser Text
@ -322,20 +311,19 @@ takeWhile1_ = mfilter (not . Text.null) . takeWhile_
function returns true. function returns true.
-} -}
scan :: scan ::
-- | scan function -- | scan function
(state -> Char -> Maybe state) -> (state -> Char -> Maybe state) ->
-- | initial state -- | initial state
state -> state ->
Parser Text Parser Text
scan f initState = do scan f initState = do
parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
(remaining, finalPos, ct) <- go input initState pos 0
(remaining, finalPos, ct) <- go input initState pos 0 let newState = parserState{stateInput = remaining, statePos = finalPos}
let newState = parserState{stateInput = remaining, statePos = finalPos} Parsec.setParserState newState $> Text.take ct input
Parsec.setParserState newState $> Text.take ct input where
where go !input' !st !posAccum !count' = case Text.uncons input' of
go !input' !st !posAccum !count' = case Text.uncons input' of Nothing -> pure (input', posAccum, count')
Nothing -> pure (input', posAccum, count') Just (char', input'') -> case f st char' of
Just (char', input'') -> case f st char' of Nothing -> pure (input', posAccum, count')
Nothing -> pure (input', posAccum, count') Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)

View file

@ -13,9 +13,7 @@ import Text.Parsec.Pos (updatePosChar)
Return everything consumed except for the end pattern itself. Return everything consumed except for the end pattern itself.
-} -}
takeUntil :: Text -> Parser Text takeUntil :: Text -> Parser Text
takeUntil end_ = takeUntil end_ = Text.dropEnd (Text.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome
requireEnd (scan p (False, end))
>>= gotSome . Text.dropEnd (Text.length end_)
where where
end = Text.unpack end_ end = Text.unpack end_

View file

@ -9,8 +9,6 @@ module Types (
) )
where where
import Data.Foldable (fold)
newtype Document = Document newtype Document = Document
{ meta :: Meta { meta :: Meta
} }
@ -30,7 +28,6 @@ data Since = Since
-- Could have a better type? -- Could have a better type?
type Version = [Int] type Version = [Int]
type Package = String type Package = String
data DocMarkup mod id data DocMarkup mod id
@ -54,78 +51,78 @@ data DocMarkup mod id
| -- | Bold __bold text__ | -- | Bold __bold text__
DocBold (DocMarkup mod id) DocBold (DocMarkup mod id)
| {- | Unordered lists | {- | Unordered lists
* this * this
or or
- this - this
-} -}
DocUnorderedList [DocMarkup mod id] DocUnorderedList [DocMarkup mod id]
| {- | Ordered lists | {- | Ordered lists
1. this 1. this
or or
(1) this (1) this
-} -}
DocOrderedList [(Int, DocMarkup mod id)] DocOrderedList [(Int, DocMarkup mod id)]
| {- | Definition lists | {- | Definition lists
[term] a term [term] a term
[another term] another definition [another term] another definition
-} -}
DocDefinitionList [(DocMarkup mod id, DocMarkup mod id)] DocDefinitionList [(DocMarkup mod id, DocMarkup mod id)]
| {- | Code blocks | {- | Code blocks
@ @
a code block in here a code block in here
with multiple lines with multiple lines
@ @
Or with bird tracks: Or with bird tracks:
> some code > some code
> goes here > goes here
-} -}
DocCodeBlock (DocMarkup mod id) DocCodeBlock (DocMarkup mod id)
| {- | Hyperlinks | {- | Hyperlinks
__marked__: __marked__:
<http://example.com> <http://example.com>
<http://example.com label text> <http://example.com label text>
__Auto-detected URLs__: __Auto-detected URLs__:
http://example.com http://example.com
https://example.com https://example.com
ftp://example.com ftp://example.com
__Markdown style__ __Markdown style__
[link text](http://example.com) [link text](http://example.com)
[link text]("Module.Name") [link text]("Module.Name")
-} -}
DocHyperlink (Hyperlink (DocMarkup mod id)) DocHyperlink (Hyperlink (DocMarkup mod id))
| {- | Pictures | {- | Pictures
<<image.png>> <<image.png>>
<<image.png title text>> <<image.png title text>>
__Markdown Images__ __Markdown Images__
![alt text](image.png) ![alt text](image.png)
-} -}
DocPicture Picture DocPicture Picture
| {- | Inline math expressions | {- | Inline math expressions
\(mathematical expression\) \(mathematical expression\)
-} -}
DocMathInline String DocMathInline String
| {- | Math multiline display | {- | Math multiline display
\[ \[
mathematical expression mathematical expression
in multiple lines in multiple lines
\] \]
-} -}
DocMathDisplay String DocMathDisplay String
| {- | Anchors, no spaces allowed | {- | Anchors, no spaces allowed
#anchor-name# #anchor-name#
-} -}
DocAnchor String DocAnchor String
| {- | Property descriptions | {- | Property descriptions
prop> property description prop> property description
-} -}
DocProperty String DocProperty String
| {- | Examples | {- | Examples
>>> expression >>> expression
result line 1 result line 1
result line 2 result line 2
-} -}
DocExamples [Example] DocExamples [Example]
| -- | Header | -- | Header
@ -139,7 +136,7 @@ instance Semigroup (DocMarkup mod id) where
instance Monoid (DocMarkup mod id) where instance Monoid (DocMarkup mod id) where
mempty = DocEmpty mempty = DocEmpty
mconcat = fold mconcat = foldr (<>) mempty
data ModuleLink id = ModuleLink data ModuleLink id = ModuleLink
{ name :: String { name :: String

View file

@ -1,42 +1,42 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_GHC -Wno-orphans #-} {-# OPTIONS_GHC -Wno-orphans #-}
import Data.String (IsString (..)) import Test.Hspec
import Data.Text (Text)
import GHC.Stack
import Identifier (Identifier) import Identifier (Identifier)
import Lexer import Lexer
import Parser import Parser
import Types import Types
import Test.Hspec import Data.String (IsString (..))
import Data.Text (Text)
import Text.Parsec.Pos import Text.Parsec.Pos
import GHC.Stack
main :: IO () main :: IO ()
main = hspec $ do main = hspec $ do
describe "Lexer" do describe "Lexer" do
describe "minimal" do describe "minimal" do
it "handles unicode" unicode it "handles unicode" unicode
it "escapes" escaping it "escapes" escaping
it "maths" math it "maths" maths
it "anchors" anchor it "anchors" anchors
it "space chars" space it "space chars" space
it "bare string" someString it "bare string" someString
it "emphasis" emphatic it "emphasis" emphatic
it "monospace" monospace it "monospace" monospace
it "labeled link" labeledLink it "labeled link" labeledLink
it "markdown link" link it "markdown link" link
it "bird tracks" birdTracks it "bird tracks" birdTracks
it "module names" modules it "module names" modules
it "quotes" quotes it "quotes" quotes
it "numeric entity" numericEntity it "ignores nesting" ignoreNesting
it "ignores nesting" ignoreNesting
describe "Parser" do describe "Parser" do
it "Bold" do it "Bold" do
"__bold__" `shouldParseTo` DocBold (DocString "bold") "__bold__" `shouldParseTo` (DocBold (DocString "bold"))
it "Emphasis" do it "Emphasis" do
"/emphasis/" `shouldParseTo` DocEmphasis (DocString "emphasis") "/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis"))
------------ ------------
-- Tests -- Tests
@ -44,150 +44,136 @@ main = hspec $ do
modules :: Expectation modules :: Expectation
modules = do modules = do
"\"MyModule.Name\"" "\"MyModule.Name\""
`shouldLexTo` [ (1, 2, Module "MyModule.Name") `shouldLexTo` [ (1, 1, Module)
] , (1, 2, Token "MyModule.Name")
]
"\"OtherModule.Name#myAnchor\"" "\"OtherModule.Name#myAnchor\""
`shouldLexTo` [ (1, 2, Module "OtherModule.Name") `shouldLexTo` [ (1, 1, Module)
, (1, 18, Anchor "myAnchor") , (1, 2, Token "OtherModule.Name")
] , (1, 18, Anchor "myAnchor")
]
"\"OtherModule.Name\\#myAnchor\""
`shouldLexTo` [ (1, 2, Module "OtherModule.Name")
, (1, 18, Anchor "myAnchor")
]
"\"OtherModule.Name\\#myAnchor\""
`shouldLexTo` [ (1, 1, Module)
, (1, 2, Token "OtherModule.Name")
, (1, 18, Anchor "myAnchor")
]
link :: Expectation link :: Expectation
link = link =
"[link to](http://some.website)" "[link to](http://some.website)"
`shouldLexTo` [ (1, 1, LinkOpen) `shouldLexTo` [ (1, 1, LinkOpen)
, (1, 1, BracketOpen) , (1, 1, BracketOpen)
, (1, 2, Token "link to") , (1, 2, Token "link to")
, (1, 9, BracketClose) , (1, 9, BracketClose)
, (1, 10, ParenOpen) , (1, 10, ParenOpen)
, (1, 11, Token "http://some.website") , (1, 11, Token "http://some.website")
, (1, 30, ParenClose) , (1, 30, ParenClose)
, (1, 30, LinkClose) , (1, 30, LinkClose)
] ]
labeledLink :: Expectation labeledLink :: Expectation
labeledLink = labeledLink =
"<[link here](http://to.here) label>" "<[link here](http://to.here) label>"
`shouldLexTo` [ (1, 1, LabeledLinkOpen) `shouldLexTo` [ (1, 1, LabeledLinkOpen)
, (1, 2, BracketOpen) , (1, 2, BracketOpen)
, (1, 3, Token "link here") , (1, 3, Token "link here")
, (1, 12, BracketClose) , (1, 12, BracketClose)
, (1, 13, ParenOpen) , (1, 13, ParenOpen)
, (1, 14, Token "http://to.here") , (1, 14, Token "http://to.here")
, (1, 28, ParenClose) , (1, 28, ParenClose)
, (1, 29, Token " label") , (1, 29, Token " label")
, (1, 35, LabeledLinkClose) , (1, 35, LabeledLinkClose)
] ]
anchor :: Expectation anchors :: Expectation
anchor = anchors =
"#myAnchor#" "#myAnchor#"
`shouldLexTo` [ (1, 1, Anchor "myAnchor") `shouldLexTo` [ (1, 1, Anchor "myAnchor")
] ]
math :: IO () maths :: IO ()
math = do maths = do
"\\[some math\\]" "\\[some math\\]"
`shouldLexTo` [ (1, 1, MathMultilineOpen) `shouldLexTo` [ (1, 1, MathsBracketOpen)
, (1, 3, Token "some math") , (1, 3, Token "some math")
, (1, 12, MathMultilineClose) , (1, 12, MathsBracketClose)
] ]
"\\(other maths\\)" "\\(other maths\\)"
`shouldLexTo` [ (1, 1, MathInlineOpen) `shouldLexTo` [ (1, 1, MathsParenOpen)
, (1, 3, Token "other maths") , (1, 3, Token "other maths")
, (1, 14, MathInlineClose) , (1, 14, MathsParenClose)
] ]
escaping :: Expectation escaping :: Expectation
escaping = do escaping =
"\\(" "\\("
`shouldLexTo` [ (1, 1, Escape) `shouldLexTo` [ (1, 1, Escape)
, (1, 2, Token "(") , (1, 2, Token "(")
] ]
"\\(\r\n"
`shouldLexTo` [ (1, 1, Escape)
, (1, 2, Token "(")
]
unicode :: Expectation unicode :: Expectation
unicode = unicode =
"ドラゴンクエストの冒険者🐉" "ドラゴンクエストの冒険者🐉"
`shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者🐉") `shouldLexTo` [ (1, 1, Token "ドラゴンクエストの冒険者🐉")
] ]
ignoreNesting :: Expectation ignoreNesting :: Expectation
ignoreNesting = ignoreNesting =
">/foo/" ">/foo/"
`shouldLexTo` [ (1, 1, Token ">/foo/") `shouldLexTo` [ (1, 1, Token ">/foo/")
] ]
birdTracks :: Expectation birdTracks :: Expectation
birdTracks = birdTracks =
">> code" ">> code"
`shouldLexTo` [ (1, 1, BirdTrack) `shouldLexTo` [ (1, 1, BirdTrack)
, (1, 4, Token "code") , (1, 4, Token "code")
] ]
quotes :: Expectation quotes :: Expectation
quotes = quotes =
"\"quoted\"" "\"quoted\""
`shouldLexTo` [ (1, 1, QuoteOpen) `shouldLexTo` [ (1, 1, QuoteOpen)
, (1, 2, Token "quoted") , (1, 2, Token "quoted")
, (1, 8, QuoteClose) , (1, 8, QuoteClose)
] ]
space :: Expectation space :: Expectation
space = do space = do
"\n " "\n "
`shouldLexTo` [ (1, 1, Newline) `shouldLexTo` [ (1, 1, Newline)
, (2, 1, Space) , (2, 1, Space)
] ]
" \n" " \n"
`shouldLexTo` [ (1, 1, Space) `shouldLexTo` [ (1, 1, Space)
, (1, 2, Newline) , (1, 2, Newline)
] ]
numericEntity :: Expectation
numericEntity = do
"&#65; &#955;"
`shouldLexTo` [ (1, 1, NumericEntity 65)
, (1, 6, Space)
, (1, 7, NumericEntity 955) -- lambda
]
-- Hex
"&#x65;"
`shouldLexTo` [ (1, 1, NumericEntity 101)
]
monospace :: Expectation monospace :: Expectation
monospace = monospace =
"@mono@" "@mono@"
`shouldLexTo` [ (1, 1, MonospaceOpen) `shouldLexTo` [ (1, 1, MonospaceOpen)
, (1, 2, Token "mono") , (1, 2, Token "mono")
, (1, 6, MonospaceClose) , (1, 6, MonospaceClose)
] ]
emphatic :: Expectation emphatic :: Expectation
emphatic = emphatic =
"/emphatic/" "/emphatic/"
`shouldLexTo` [ (1, 1, EmphasisOpen) `shouldLexTo` [ (1, 1, EmphasisOpen)
, (1, 2, Token "emphatic") , (1, 2, Token "emphatic")
, (1, 10, EmphasisClose) , (1, 10, EmphasisClose)
] ]
someString :: Expectation someString :: Expectation
someString = someString =
"some string" "some string"
`shouldLexTo` [ (1, 1, Token "some") `shouldLexTo` [ (1, 1, Token "some")
, (1, 5, Space) , (1, 5, Space)
, (1, 6, Token "string") , (1, 6, Token "string")
] ]
-------------- --------------
-- Helpers -- Helpers
@ -196,16 +182,16 @@ someString =
type Doc id = DocMarkup () id type Doc id = DocMarkup () id
instance IsString (Doc String) where instance IsString (Doc String) where
fromString = DocString fromString = DocString
shouldLexTo :: String -> [(Int, Int, Token)] -> Expectation shouldLexTo :: String -> [(Int, Int, Token)] -> Expectation
shouldLexTo input expected = shouldLexTo input expected =
withFrozenCallStack $ withFrozenCallStack $
case lexer input of case lexer input of
Right tokens -> do Right tokens -> do
let actual = map (\(pos, tok) -> (sourceLine pos, sourceColumn pos, tok)) tokens let actual = map (\(pos, tok) -> (sourceLine pos, sourceColumn pos, tok)) tokens
actual `shouldBe` expected actual `shouldBe` expected
Left err -> expectationFailure $ "Parse error: " <> show err Left err -> expectationFailure $ "Parse error: " <> show err
shouldParseTo :: Text -> DocMarkup mod Identifier -> Expectation shouldParseTo :: Text -> DocMarkup mod Identifier -> Expectation
shouldParseTo input ast = parseText input `shouldBe` ast shouldParseTo input ast = parseText input `shouldBe` ast