forked from elland/haddock2
Init
This commit is contained in:
commit
c9f61c4e06
16 changed files with 1366 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
dist-newstyle
|
||||||
5
CHANGELOG.md
Normal file
5
CHANGELOG.md
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
# Revision history for haddock2
|
||||||
|
|
||||||
|
## 0.1.0.0 -- YYYY-mm-dd
|
||||||
|
|
||||||
|
* First version. Released on an unsuspecting world.
|
||||||
88
Grammar.ebnf
Normal file
88
Grammar.ebnf
Normal file
|
|
@ -0,0 +1,88 @@
|
||||||
|
document ::= paragraph*
|
||||||
|
|
||||||
|
paragraph ::= ( text_paragraph | code_block | header | list | table ) newline?
|
||||||
|
|
||||||
|
text_paragraph ::= text_element+
|
||||||
|
|
||||||
|
text_element ::= emphasis | bold | monospace | link | anchor | identifier |
|
||||||
|
inline_math | image | plain_text | escaped_char
|
||||||
|
|
||||||
|
emphasis ::= '/' text_no_newline '/'
|
||||||
|
bold ::= '__' text_no_newline '__'
|
||||||
|
monospace ::= '@' text_content '@'
|
||||||
|
|
||||||
|
link ::= module_link | hyperlink | markdown_link
|
||||||
|
module_link ::= '"' module_name ( '#' anchor_name )? '"'
|
||||||
|
hyperlink ::= '<' url ( ' ' link_text )? '>'
|
||||||
|
markdown_link ::= '[' link_text '](' ( url | module_link ) ')'
|
||||||
|
|
||||||
|
anchor ::= '#' anchor_name '#'
|
||||||
|
identifier ::= "'" haskell_id "'"
|
||||||
|
|
||||||
|
inline_math ::= '\(' math_expr '\)'
|
||||||
|
image ::= '<<' image_path ( ' ' image_title )? '>>' |
|
||||||
|
''
|
||||||
|
|
||||||
|
code_block ::= at_block | bird_tracks | example_block | property_block
|
||||||
|
at_block ::= '@' newline code_content newline '@'
|
||||||
|
bird_tracks ::= ( '>' ' '? code_line newline )+
|
||||||
|
example_block ::= ( '>>>' ' ' expression newline result_line* )+
|
||||||
|
property_block ::= 'prop>' ' ' property_desc newline
|
||||||
|
|
||||||
|
header ::= header_marker ' ' header_text newline
|
||||||
|
header_marker ::= '=' | '==' | '===' | '====' | '=====' | '======'
|
||||||
|
|
||||||
|
list ::= unordered_list | ordered_list | definition_list
|
||||||
|
unordered_list ::= ( list_marker ' ' list_content )+
|
||||||
|
list_marker ::= '*' | '-'
|
||||||
|
ordered_list ::= ( number_marker ' ' list_content )+
|
||||||
|
number_marker ::= digit+ '.' | '(' digit+ ')'
|
||||||
|
definition_list ::= ( '[' term ']' ':'? ' ' definition_content )+
|
||||||
|
|
||||||
|
table ::= table_border header_row header_sep data_row* table_border
|
||||||
|
table_border ::= '+' ( '-' | '+' )* newline
|
||||||
|
header_row ::= '|' ( table_cell '|' )* newline
|
||||||
|
header_sep ::= '+' ( '=' | '+' )* newline
|
||||||
|
data_row ::= '|' ( table_cell '|' )* newline
|
||||||
|
|
||||||
|
plain_text ::= text_char+
|
||||||
|
text_char ::= letter | digit | ' ' | punctuation
|
||||||
|
text_no_newline ::= ( letter | digit | ' ' | safe_punctuation )+
|
||||||
|
text_content ::= ( letter | digit | ' ' | newline | punctuation )*
|
||||||
|
code_content ::= code_char*
|
||||||
|
code_char ::= letter | digit | ' ' | newline | punctuation
|
||||||
|
code_line ::= ( letter | digit | ' ' | punctuation )*
|
||||||
|
list_content ::= text_element* ( newline ' ' text_element* )?
|
||||||
|
table_cell ::= ' ' table_char* ' '
|
||||||
|
table_char ::= letter | digit | ' ' | safe_punctuation
|
||||||
|
header_text ::= ( letter | digit | ' ' | punctuation )+
|
||||||
|
link_text ::= link_char+
|
||||||
|
link_char ::= letter | digit | ' ' | safe_punctuation
|
||||||
|
alt_text ::= alt_char+
|
||||||
|
alt_char ::= letter | digit | ' ' | safe_punctuation
|
||||||
|
image_path ::= ( letter | digit | '.' | '/' | '-' | '_' )+
|
||||||
|
image_title ::= image_title_char+
|
||||||
|
image_title_char ::= letter | digit | ' ' | safe_punctuation
|
||||||
|
anchor_name ::= letter ( letter | digit | '-' )*
|
||||||
|
haskell_id ::= letter ( letter | digit | '_' | "'" )*
|
||||||
|
module_name ::= haskell_id ( '.' haskell_id )*
|
||||||
|
url ::= ( 'http://' | 'https://' | 'ftp://' ) url_char+
|
||||||
|
url_char ::= letter | digit | '/' | '.' | ':' | '-' | '_' | '?' | '&' | '='
|
||||||
|
math_expr ::= math_char+
|
||||||
|
math_char ::= letter | digit | ' ' | math_punctuation
|
||||||
|
property_desc ::= ( letter | digit | ' ' | punctuation )+
|
||||||
|
expression ::= ( letter | digit | ' ' | punctuation )+
|
||||||
|
result_line ::= result_char* newline
|
||||||
|
result_char ::= letter | digit | ' ' | safe_punctuation
|
||||||
|
term ::= term_char+
|
||||||
|
term_char ::= letter | digit | ' ' | safe_punctuation
|
||||||
|
definition_content ::= text_element+
|
||||||
|
escaped_char ::= '\' special_char
|
||||||
|
|
||||||
|
special_char ::= '/' | '*' | '@' | "'" | '"' | '#' | '<' | '>' | '[' | ']' | '(' | ')' | '|' | '=' | '-' | '+'
|
||||||
|
safe_punctuation ::= '!' | '$' | '%' | '^' | '&' | '(' | ')' | '_' | '+' | '{' | '}' | ';' | ':' | ',' | '.' | '`' | '~'
|
||||||
|
math_punctuation ::= '+' | '-' | '*' | '/' | '^' | '_' | '=' | '(' | ')' | '{' | '}' | '[' | ']'
|
||||||
|
punctuation ::= '!' | '@' | '#' | '$' | '%' | '^' | '&' | '*' | '(' | ')' | '-' | '_' | '+' | '=' | '{' | '}' | '[' | ']' | '|' | '\' | ':' | ';' | '"' | "'" | '<' | '>' | '?' | ',' | '.' | '/' | '`' | '~'
|
||||||
|
letter ::= 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z'
|
||||||
|
digit ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
|
||||||
|
newline ::= #xA | #xD #xA | #xD
|
||||||
29
LICENSE
Normal file
29
LICENSE
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
Copyright (c) 2025, Igor Ranieri
|
||||||
|
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following
|
||||||
|
disclaimer in the documentation and/or other materials provided
|
||||||
|
with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the copyright holder nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
4
app/Main.hs
Normal file
4
app/Main.hs
Normal file
|
|
@ -0,0 +1,4 @@
|
||||||
|
module Main where
|
||||||
|
|
||||||
|
main :: IO ()
|
||||||
|
main = putStrLn "Hello, Haskell!"
|
||||||
1
cabal.project.local
Normal file
1
cabal.project.local
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
tests: True
|
||||||
55
haddock2.cabal
Normal file
55
haddock2.cabal
Normal file
|
|
@ -0,0 +1,55 @@
|
||||||
|
cabal-version: 3.4
|
||||||
|
name: haddock2
|
||||||
|
version: 0.1.0.0
|
||||||
|
license: BSD-3-Clause
|
||||||
|
license-file: LICENSE
|
||||||
|
author: Igor Ranieri
|
||||||
|
maintainer: igor@elland.me
|
||||||
|
build-type: Simple
|
||||||
|
extra-doc-files: CHANGELOG.md
|
||||||
|
|
||||||
|
common common
|
||||||
|
ghc-options: -Wall
|
||||||
|
default-extensions:
|
||||||
|
BlockArguments
|
||||||
|
DuplicateRecordFields
|
||||||
|
NoFieldSelectors
|
||||||
|
|
||||||
|
executable haddock2
|
||||||
|
import: common
|
||||||
|
main-is: Main.hs
|
||||||
|
build-depends: base >=4.20.1.0
|
||||||
|
hs-source-dirs: app
|
||||||
|
default-language: GHC2024
|
||||||
|
|
||||||
|
library haddock2-lib
|
||||||
|
import: common
|
||||||
|
build-depends:
|
||||||
|
base >=4.20.1.0,
|
||||||
|
parsec ^>=3.1.18.0,
|
||||||
|
text ^>=2.1.2,
|
||||||
|
|
||||||
|
hs-source-dirs: src
|
||||||
|
-- cabal-gild: discover src
|
||||||
|
exposed-modules:
|
||||||
|
Identifier
|
||||||
|
Lexer
|
||||||
|
Parser
|
||||||
|
Parser.Util
|
||||||
|
ParserMonad
|
||||||
|
Types
|
||||||
|
|
||||||
|
default-language: GHC2024
|
||||||
|
|
||||||
|
test-suite haddock2-test
|
||||||
|
import: common
|
||||||
|
type: exitcode-stdio-1.0
|
||||||
|
main-is: Spec.hs
|
||||||
|
build-depends:
|
||||||
|
base >=4.20.1.0,
|
||||||
|
haddock2:{haddock2-lib},
|
||||||
|
hspec ^>=2.11.0,
|
||||||
|
text ^>=2.1.2,
|
||||||
|
|
||||||
|
hs-source-dirs: test
|
||||||
|
default-language: GHC2024
|
||||||
198
markup.md
Normal file
198
markup.md
Normal file
|
|
@ -0,0 +1,198 @@
|
||||||
|
# Haddock Markup Language Examples
|
||||||
|
|
||||||
|
## Text Formatting
|
||||||
|
|
||||||
|
### Emphasis
|
||||||
|
```
|
||||||
|
/emphasized text/
|
||||||
|
```
|
||||||
|
Single line only, no newlines allowed.
|
||||||
|
|
||||||
|
### Bold
|
||||||
|
```
|
||||||
|
__bold text__
|
||||||
|
```
|
||||||
|
Single line only, no newlines allowed.
|
||||||
|
|
||||||
|
### Monospace/Code
|
||||||
|
```
|
||||||
|
@monospace text@
|
||||||
|
```
|
||||||
|
Can span multiple lines.
|
||||||
|
|
||||||
|
## Links and References
|
||||||
|
|
||||||
|
### Module Links
|
||||||
|
```
|
||||||
|
"Module.Name"
|
||||||
|
"Module.Name#anchor"
|
||||||
|
"Module.Name\#anchor"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Hyperlinks
|
||||||
|
```
|
||||||
|
<http://example.com>
|
||||||
|
<http://example.com label text>
|
||||||
|
```
|
||||||
|
Auto-detected URLs:
|
||||||
|
```
|
||||||
|
http://example.com
|
||||||
|
https://example.com
|
||||||
|
ftp://example.com
|
||||||
|
```
|
||||||
|
|
||||||
|
### Markdown-style Links
|
||||||
|
```
|
||||||
|
[link text](http://example.com)
|
||||||
|
[link text]("Module.Name")
|
||||||
|
```
|
||||||
|
|
||||||
|
### Anchors
|
||||||
|
```
|
||||||
|
#anchor-name#
|
||||||
|
```
|
||||||
|
No spaces allowed in anchor names.
|
||||||
|
|
||||||
|
### Identifiers
|
||||||
|
```
|
||||||
|
'identifier'
|
||||||
|
```
|
||||||
|
Links to Haskell identifiers.
|
||||||
|
|
||||||
|
## Images
|
||||||
|
|
||||||
|
### Basic Images
|
||||||
|
```
|
||||||
|
<<image.png>>
|
||||||
|
<<image.png title text>>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Markdown Images
|
||||||
|
```
|
||||||
|

|
||||||
|
```
|
||||||
|
|
||||||
|
## Math
|
||||||
|
|
||||||
|
### Inline Math
|
||||||
|
```
|
||||||
|
\(mathematical expression\)
|
||||||
|
```
|
||||||
|
Single line only.
|
||||||
|
|
||||||
|
### Display Math
|
||||||
|
```
|
||||||
|
\[mathematical expression\]
|
||||||
|
```
|
||||||
|
Can span multiple lines.
|
||||||
|
|
||||||
|
## Code and Examples
|
||||||
|
|
||||||
|
### Code Blocks
|
||||||
|
```
|
||||||
|
@
|
||||||
|
code block content
|
||||||
|
with multiple lines
|
||||||
|
@
|
||||||
|
```
|
||||||
|
|
||||||
|
### Bird Tracks (Code)
|
||||||
|
```
|
||||||
|
> code line 1
|
||||||
|
> code line 2
|
||||||
|
```
|
||||||
|
Each line starts with `>` followed by optional space.
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
```
|
||||||
|
>>> expression
|
||||||
|
result line 1
|
||||||
|
result line 2
|
||||||
|
|
||||||
|
>>> another expression
|
||||||
|
result
|
||||||
|
```
|
||||||
|
|
||||||
|
### Properties
|
||||||
|
```
|
||||||
|
prop> property description
|
||||||
|
```
|
||||||
|
|
||||||
|
## Lists
|
||||||
|
|
||||||
|
### Unordered Lists
|
||||||
|
```
|
||||||
|
* item 1
|
||||||
|
* item 2
|
||||||
|
continued content
|
||||||
|
|
||||||
|
- item 1
|
||||||
|
- item 2
|
||||||
|
```
|
||||||
|
|
||||||
|
### Ordered Lists
|
||||||
|
```
|
||||||
|
1. item 1
|
||||||
|
2. item 2
|
||||||
|
|
||||||
|
(1) item 1
|
||||||
|
(2) item 2
|
||||||
|
```
|
||||||
|
|
||||||
|
### Definition Lists
|
||||||
|
```
|
||||||
|
[term] definition content
|
||||||
|
[another term] more definition content
|
||||||
|
```
|
||||||
|
Optional colon after closing bracket.
|
||||||
|
|
||||||
|
## Tables
|
||||||
|
|
||||||
|
### Grid Tables
|
||||||
|
```
|
||||||
|
+----------+----------+
|
||||||
|
| Header 1 | Header 2 |
|
||||||
|
+==========+==========+
|
||||||
|
| Cell 1 | Cell 2 |
|
||||||
|
+----------+----------+
|
||||||
|
| Cell 3 | Cell 4 |
|
||||||
|
+----------+----------+
|
||||||
|
```
|
||||||
|
- First row determines table width
|
||||||
|
- Header separator uses `=` characters
|
||||||
|
- Regular separators use `-`
|
||||||
|
- Edges can be `+` or `|`
|
||||||
|
|
||||||
|
## Headers
|
||||||
|
```
|
||||||
|
= Level 1 Header
|
||||||
|
== Level 2 Header
|
||||||
|
=== Level 3 Header
|
||||||
|
==== Level 4 Header
|
||||||
|
===== Level 5 Header
|
||||||
|
====== Level 6 Header
|
||||||
|
```
|
||||||
|
Up to 6 levels deep.
|
||||||
|
|
||||||
|
## Special Elements
|
||||||
|
|
||||||
|
### Since Annotations
|
||||||
|
```
|
||||||
|
@since package-name-1.2.3
|
||||||
|
@since 1.2.3
|
||||||
|
```
|
||||||
|
|
||||||
|
### Numeric Character References
|
||||||
|
```
|
||||||
|
A (decimal)
|
||||||
|
A (hexadecimal)
|
||||||
|
A (hexadecimal)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Escaping
|
||||||
|
Use backslash `\` to escape special characters. Trailing backslash without following character is treated as literal backslash.
|
||||||
|
|
||||||
|
## Structure
|
||||||
|
- Paragraphs separated by blank lines
|
||||||
|
- 4-space indentation for nested content
|
||||||
|
- Whitespace handling varies by context
|
||||||
157
src/Identifier.hs
Normal file
157
src/Identifier.hs
Normal file
|
|
@ -0,0 +1,157 @@
|
||||||
|
module Identifier (
|
||||||
|
Identifier (..),
|
||||||
|
Namespace (..),
|
||||||
|
parseValid,
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.Text qualified as Text
|
||||||
|
import Text.Parsec qualified as Parsec
|
||||||
|
|
||||||
|
import Control.Monad (guard)
|
||||||
|
import Data.Char (isAlpha, isAlphaNum)
|
||||||
|
import Data.Functor (($>))
|
||||||
|
import Data.Maybe (listToMaybe, maybeToList)
|
||||||
|
import Data.Text (Text)
|
||||||
|
import ParserMonad
|
||||||
|
import Text.Parsec (State (..))
|
||||||
|
import Text.Parsec.Pos (updatePosChar)
|
||||||
|
import Text.Read.Lex (isSymbolChar)
|
||||||
|
|
||||||
|
-- | Identifier string surrounded with namespace, opening, and closing quotes/backticks.
|
||||||
|
data Identifier = Identifier !Namespace !Char String !Char
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
-- | The namespace qualification for an identifier.
|
||||||
|
data Namespace = Value | Type | None deriving (Eq, Ord, Enum, Show)
|
||||||
|
|
||||||
|
parseValid :: Parser Identifier
|
||||||
|
parseValid = do
|
||||||
|
state@State{stateInput = input, statePos = pos} <- Parsec.getParserState
|
||||||
|
|
||||||
|
case takeIdentifier input of
|
||||||
|
Nothing -> Parsec.parserFail "parseValid: Failed to match a valid identifier"
|
||||||
|
Just (namespace, op, ident, cl, input') ->
|
||||||
|
let posOp = updatePosChar pos op
|
||||||
|
posIdent = Text.foldl updatePosChar posOp ident
|
||||||
|
posCl = updatePosChar posIdent cl
|
||||||
|
newState = state{stateInput = input', statePos = posCl}
|
||||||
|
in Parsec.setParserState newState $> Identifier namespace op (Text.unpack ident) cl
|
||||||
|
|
||||||
|
{- | Try to parse a delimited identifier off the front of the given input.
|
||||||
|
|
||||||
|
This tries to match as many valid Haskell identifiers/operators as possible,
|
||||||
|
to the point of sometimes accepting invalid things (ex: keywords). Some
|
||||||
|
considerations:
|
||||||
|
|
||||||
|
- operators and identifiers can have module qualifications
|
||||||
|
- operators can be wrapped in parens (for prefix)
|
||||||
|
- identifiers can be wrapped in backticks (for infix)
|
||||||
|
- delimiters are backticks or regular ticks
|
||||||
|
- since regular ticks are also valid in identifiers, we opt for the
|
||||||
|
longest successful parse
|
||||||
|
|
||||||
|
This function should make /O(1)/ allocations
|
||||||
|
-}
|
||||||
|
takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text)
|
||||||
|
takeIdentifier input = listToMaybe $ do
|
||||||
|
-- Optional namespace
|
||||||
|
let (namespace, input') = case Text.uncons input of
|
||||||
|
Just ('v', i) -> (Value, i)
|
||||||
|
Just ('t', i) -> (Type, i)
|
||||||
|
_ -> (None, input)
|
||||||
|
-- Opening tick
|
||||||
|
(op, input'') <- maybeToList (Text.uncons input')
|
||||||
|
guard (op == '\'' || op == '`')
|
||||||
|
|
||||||
|
-- Identifier/operator
|
||||||
|
(ident, input''') <- wrapped input''
|
||||||
|
|
||||||
|
-- Closing tick
|
||||||
|
(cl, input'''') <- maybeToList (Text.uncons input''')
|
||||||
|
guard (cl == '\'' || cl == '`')
|
||||||
|
|
||||||
|
pure (namespace, op, ident, cl, input'''')
|
||||||
|
where
|
||||||
|
-- \| Parse out a wrapped, possibly qualified, operator or identifier
|
||||||
|
wrapped t = do
|
||||||
|
(c, t') <- maybeToList (Text.uncons t)
|
||||||
|
-- Tuples
|
||||||
|
case c of
|
||||||
|
'('
|
||||||
|
| Just (c', _) <- Text.uncons t'
|
||||||
|
, c' == ',' || c' == ')' ->
|
||||||
|
do
|
||||||
|
let (commas, t'') = Text.span (== ',') t'
|
||||||
|
(')', t''') <- maybeToList (Text.uncons t'')
|
||||||
|
pure (Text.take (Text.length commas + 2) t, t''')
|
||||||
|
|
||||||
|
-- Parenthesized
|
||||||
|
'(' -> do
|
||||||
|
(n, t'') <- general False 0 [] t'
|
||||||
|
(')', t''') <- maybeToList (Text.uncons t'')
|
||||||
|
pure (Text.take (n + 2) t, t''')
|
||||||
|
|
||||||
|
-- Backticked
|
||||||
|
'`' -> do
|
||||||
|
(n, t'') <- general False 0 [] t'
|
||||||
|
('`', t''') <- maybeToList (Text.uncons t'')
|
||||||
|
pure (Text.take (n + 2) t, t''')
|
||||||
|
|
||||||
|
-- Unadorned
|
||||||
|
_ -> do
|
||||||
|
(n, t'') <- general False 0 [] t
|
||||||
|
pure (Text.take n t, t'')
|
||||||
|
|
||||||
|
-- \| Parse out a possibly qualified operator or identifier
|
||||||
|
general ::
|
||||||
|
Bool ->
|
||||||
|
-- \^ refuse inputs starting with operators
|
||||||
|
Int ->
|
||||||
|
-- \^ total characters \"consumed\" so far
|
||||||
|
[(Int, Text)] ->
|
||||||
|
-- \^ accumulated results
|
||||||
|
Text ->
|
||||||
|
-- \^ current input
|
||||||
|
[(Int, Text)]
|
||||||
|
-- \^ total characters parsed & what remains
|
||||||
|
general !identOnly !i acc t
|
||||||
|
-- Starts with an identifier (either just an identifier, or a module qual)
|
||||||
|
| Just (n, rest) <- identLike t =
|
||||||
|
if Text.null rest
|
||||||
|
then acc
|
||||||
|
else case Text.head rest of
|
||||||
|
'`' -> (n + i, rest) : acc
|
||||||
|
')' -> (n + i, rest) : acc
|
||||||
|
'.' -> general False (n + i + 1) acc (Text.tail rest)
|
||||||
|
'\'' ->
|
||||||
|
let (m, rest') = quotes rest
|
||||||
|
in general True (n + m + 1 + i) ((n + m + i, rest') : acc) (Text.tail rest')
|
||||||
|
_ -> acc
|
||||||
|
-- An operator
|
||||||
|
| Just (n, rest) <- optr t
|
||||||
|
, not identOnly =
|
||||||
|
(n + i, rest) : acc
|
||||||
|
-- Anything else
|
||||||
|
| otherwise =
|
||||||
|
acc
|
||||||
|
|
||||||
|
-- \| Parse an identifier off the front of the input
|
||||||
|
identLike t
|
||||||
|
| Text.null t = Nothing
|
||||||
|
| isAlpha (Text.head t) || '_' == Text.head t =
|
||||||
|
let !(idt, rest) = Text.span (\c -> isAlphaNum c || c == '_') t
|
||||||
|
!(octos, rest') = Text.span (== '#') rest
|
||||||
|
in Just (Text.length idt + Text.length octos, rest')
|
||||||
|
| otherwise = Nothing
|
||||||
|
|
||||||
|
-- \| Parse all but the last quote off the front of the input
|
||||||
|
-- PRECONDITION: T.head t `elem` ['\'', '`']
|
||||||
|
quotes :: Text -> (Int, Text)
|
||||||
|
quotes t =
|
||||||
|
let !n = Text.length (Text.takeWhile (`elem` ['\'', '`']) t) - 1
|
||||||
|
in (n, Text.drop n t)
|
||||||
|
|
||||||
|
-- \| Parse an operator off the front of the input
|
||||||
|
optr t =
|
||||||
|
let !(op, rest) = Text.span isSymbolChar t
|
||||||
|
in if Text.null op then Nothing else Just (Text.length op, rest)
|
||||||
190
src/Lexer.hs
Normal file
190
src/Lexer.hs
Normal file
|
|
@ -0,0 +1,190 @@
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
module Lexer (
|
||||||
|
Token (..),
|
||||||
|
lexer,
|
||||||
|
emphasis,
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Control.Monad (mfilter)
|
||||||
|
import Data.Char (isAlphaNum, isPrint)
|
||||||
|
import Data.Functor (($>))
|
||||||
|
import Data.Text (Text)
|
||||||
|
import Data.Text qualified as Text
|
||||||
|
import GHC.Stack (HasCallStack)
|
||||||
|
import ParserMonad (Parser, initialParserState)
|
||||||
|
import Text.Parsec
|
||||||
|
import Text.Parsec qualified as Parsec
|
||||||
|
import Text.Parsec.Pos (updatePosChar)
|
||||||
|
|
||||||
|
type LocatedToken = (SourcePos, Token)
|
||||||
|
|
||||||
|
type Lexer = Parser [LocatedToken]
|
||||||
|
|
||||||
|
data Token
|
||||||
|
= Token Text
|
||||||
|
| Anchor
|
||||||
|
| AngleOpen
|
||||||
|
| AngleClose
|
||||||
|
| BoldOpen
|
||||||
|
| BoldClose
|
||||||
|
| BracketOpen
|
||||||
|
| BracketClose
|
||||||
|
| EmphasisOpen
|
||||||
|
| EmphasisClose
|
||||||
|
| MonospaceOpen
|
||||||
|
| MonospaceClose
|
||||||
|
| Newline
|
||||||
|
| ParenOpen
|
||||||
|
| ParenClose
|
||||||
|
| QuoteOpen
|
||||||
|
| QuoteClose
|
||||||
|
| Space
|
||||||
|
| EOF
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
lexer :: String -> Either ParseError [LocatedToken]
|
||||||
|
lexer = Parsec.runParser lexText initialParserState "input" . Text.pack
|
||||||
|
|
||||||
|
lexText :: (HasCallStack) => Parser [LocatedToken]
|
||||||
|
lexText = go
|
||||||
|
where
|
||||||
|
go = do
|
||||||
|
Parsec.optionMaybe Parsec.eof >>= \case
|
||||||
|
Just _ -> pure []
|
||||||
|
Nothing -> do
|
||||||
|
toks <-
|
||||||
|
choice
|
||||||
|
[ newlineToken
|
||||||
|
, spaceToken
|
||||||
|
, textElement
|
||||||
|
, identifier
|
||||||
|
, other
|
||||||
|
]
|
||||||
|
rest <- go
|
||||||
|
pure (toks <> rest)
|
||||||
|
|
||||||
|
match :: Parser a -> Parser (Text, a)
|
||||||
|
match p = do
|
||||||
|
input <- getInput
|
||||||
|
result <- p
|
||||||
|
input' <- getInput
|
||||||
|
let !consumed = Text.take (Text.length input - Text.length input') input
|
||||||
|
pure (consumed, result)
|
||||||
|
|
||||||
|
-- Tokens
|
||||||
|
|
||||||
|
textElement :: Parser [LocatedToken]
|
||||||
|
textElement =
|
||||||
|
choice $
|
||||||
|
Parsec.try
|
||||||
|
<$> [ emphasis
|
||||||
|
, bold
|
||||||
|
, monospace
|
||||||
|
, parens
|
||||||
|
, brackets
|
||||||
|
, angles
|
||||||
|
]
|
||||||
|
|
||||||
|
delimited :: String -> String -> Token -> Token -> Parser [LocatedToken]
|
||||||
|
delimited c1 c2 ot ct = do
|
||||||
|
pos <- getPosition
|
||||||
|
(_, content) <- match $ between op cl any'
|
||||||
|
|
||||||
|
innerToks <- case lexer $ Text.unpack content of
|
||||||
|
Left _ -> do
|
||||||
|
pos' <- getPosition
|
||||||
|
pure $ [(pos', Token content)]
|
||||||
|
Right toks -> pure toks
|
||||||
|
|
||||||
|
let openTok :: LocatedToken = (pos, ot)
|
||||||
|
closeTok :: LocatedToken = (pos, ct)
|
||||||
|
|
||||||
|
pure $ openTok : innerToks <> [closeTok]
|
||||||
|
where
|
||||||
|
op = string c1
|
||||||
|
cl = string c2
|
||||||
|
any' = Text.pack <$> manyTill anyChar (lookAhead cl)
|
||||||
|
|
||||||
|
delimited' :: String -> Token -> Token -> Parser [LocatedToken]
|
||||||
|
delimited' s t1 t2 = delimited s s t1 t2
|
||||||
|
|
||||||
|
emphasis :: Lexer
|
||||||
|
emphasis = delimited' "/" EmphasisOpen EmphasisClose
|
||||||
|
|
||||||
|
bold :: Lexer
|
||||||
|
bold = delimited' "__" BoldOpen BoldClose
|
||||||
|
|
||||||
|
monospace :: Lexer
|
||||||
|
monospace = delimited' "@" MonospaceOpen MonospaceClose
|
||||||
|
|
||||||
|
parens :: Parser [LocatedToken]
|
||||||
|
parens = delimited "(" ")" ParenOpen ParenClose
|
||||||
|
|
||||||
|
brackets :: Lexer
|
||||||
|
brackets = delimited "[" "]" ParenOpen ParenClose
|
||||||
|
|
||||||
|
angles :: Parser [LocatedToken]
|
||||||
|
angles = delimited "<" ">" AngleOpen AngleClose
|
||||||
|
|
||||||
|
other :: Lexer
|
||||||
|
other = do
|
||||||
|
pos <- getPosition
|
||||||
|
c <- takeWhile1_ isPrint
|
||||||
|
pure . pure $ (pos, Token c)
|
||||||
|
|
||||||
|
spaceToken :: Lexer
|
||||||
|
spaceToken = do
|
||||||
|
pos <- getPosition
|
||||||
|
_ <- many1 (char ' ')
|
||||||
|
pure . pure $ (pos, Space)
|
||||||
|
|
||||||
|
newlineToken :: Lexer
|
||||||
|
newlineToken = do
|
||||||
|
pos <- getPosition
|
||||||
|
_ <- newline
|
||||||
|
pure . pure $ (pos, Newline)
|
||||||
|
|
||||||
|
identifier :: Lexer
|
||||||
|
identifier = do
|
||||||
|
pos <- getPosition
|
||||||
|
txt <- takeWhile1_ isAlphaNum
|
||||||
|
pure . pure $ (pos, Token txt)
|
||||||
|
|
||||||
|
-------
|
||||||
|
-- Helpers
|
||||||
|
-------
|
||||||
|
|
||||||
|
-- | Like `takeWhile`, but unconditionally take escaped characters.
|
||||||
|
takeWhile_ :: (Char -> Bool) -> Parser Text
|
||||||
|
takeWhile_ p = scan p_ False
|
||||||
|
where
|
||||||
|
p_ escaped c
|
||||||
|
| escaped = Just False
|
||||||
|
| not $ p c = Nothing
|
||||||
|
| otherwise = Just (c == '\\')
|
||||||
|
|
||||||
|
-- | Like 'takeWhile1', but unconditionally take escaped characters.
|
||||||
|
takeWhile1_ :: (Char -> Bool) -> Parser Text
|
||||||
|
takeWhile1_ = mfilter (not . Text.null) . takeWhile_
|
||||||
|
|
||||||
|
{- | Scan the input text, accumulating characters as long as the scanning
|
||||||
|
function returns true.
|
||||||
|
-}
|
||||||
|
scan ::
|
||||||
|
-- | scan function
|
||||||
|
(state -> Char -> Maybe state) ->
|
||||||
|
-- | initial state
|
||||||
|
state ->
|
||||||
|
Parser Text
|
||||||
|
scan f initState = do
|
||||||
|
parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
|
||||||
|
(remaining, finalPos, ct) <- go input initState pos 0
|
||||||
|
let newState = parserState{stateInput = remaining, statePos = finalPos}
|
||||||
|
Parsec.setParserState newState $> Text.take ct input
|
||||||
|
where
|
||||||
|
go !input' !st !posAccum !count' = case Text.uncons input' of
|
||||||
|
Nothing -> pure (input', posAccum, count')
|
||||||
|
Just (char', input'') -> case f st char' of
|
||||||
|
Nothing -> pure (input', posAccum, count')
|
||||||
|
Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
|
||||||
228
src/Parser.hs
Normal file
228
src/Parser.hs
Normal file
|
|
@ -0,0 +1,228 @@
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
module Parser (
|
||||||
|
parse,
|
||||||
|
parseText,
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Identifier
|
||||||
|
import ParserMonad
|
||||||
|
import Types
|
||||||
|
|
||||||
|
import Control.Applicative
|
||||||
|
import Control.Monad
|
||||||
|
import Data.Char qualified as Char
|
||||||
|
import Data.Functor (($>))
|
||||||
|
import Data.List (intercalate)
|
||||||
|
import Data.Text (Text)
|
||||||
|
import Data.Text qualified as Text
|
||||||
|
import Parser.Util
|
||||||
|
import Text.Parsec qualified as Parsec
|
||||||
|
|
||||||
|
parse :: Parser a -> Text -> (ParserState, a)
|
||||||
|
parse parser = either err id . parse' (parser <* Parsec.eof)
|
||||||
|
where
|
||||||
|
err = error . ("Haddock.Parser.parse: " ++)
|
||||||
|
|
||||||
|
--------------------
|
||||||
|
-- Markup
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
{- | Skips a single special character and treats it as a plain string.
|
||||||
|
This is done to skip over any special characters belonging to other
|
||||||
|
elements but which were not deemed meaningful at their positions.
|
||||||
|
-}
|
||||||
|
skipSpecialChar :: Parser (DocMarkup mod a)
|
||||||
|
skipSpecialChar = DocString . pure <$> Parsec.oneOf specialChar
|
||||||
|
|
||||||
|
{- | Plain, regular parser for text. Called as one of the last parsers
|
||||||
|
to ensure that we have already given a chance to more meaningful parsers
|
||||||
|
before capturing their characters.
|
||||||
|
-}
|
||||||
|
string' :: Parser (DocMarkup mod a)
|
||||||
|
string' =
|
||||||
|
DocString
|
||||||
|
-- After the first character, stop for @\(@ or @\[@ math starters. (The
|
||||||
|
-- first character won't start a valid math string because this parser
|
||||||
|
-- should follow math parsers. But this parser is expected to accept at
|
||||||
|
-- least one character from all inputs that don't start with special
|
||||||
|
-- characters, so the first character parser can't have the @"(["@
|
||||||
|
-- restriction.)
|
||||||
|
<$> ((:) <$> rawOrEscChar "" <*> many (rawOrEscChar "(["))
|
||||||
|
where
|
||||||
|
-- \| Parse a single logical character, either raw or escaped. Don't accept
|
||||||
|
-- escaped characters from the argument string.
|
||||||
|
rawOrEscChar :: [Char] -> Parser Char
|
||||||
|
rawOrEscChar restrictedEscapes =
|
||||||
|
Parsec.try $
|
||||||
|
Parsec.noneOf specialChar >>= \case
|
||||||
|
-- Handle backslashes:
|
||||||
|
-- - Fail on forbidden escape characters.
|
||||||
|
-- - Non-forbidden characters: simply unescape, e.g. parse "\b" as 'b',
|
||||||
|
-- - Trailing backslash: treat it as a raw backslash, not an escape
|
||||||
|
-- sequence. (This is the logic that this parser followed when this
|
||||||
|
-- comment was written; it is not necessarily intentional but now I
|
||||||
|
-- don't want to break anything relying on it.)
|
||||||
|
'\\' -> Parsec.noneOf restrictedEscapes <|> Parsec.eof $> '\\'
|
||||||
|
c -> pure c
|
||||||
|
|
||||||
|
{- | Emphasis parser.
|
||||||
|
|
||||||
|
>>> parseString "/Hello world/"
|
||||||
|
DocEmphasis (DocString "Hello world")
|
||||||
|
-}
|
||||||
|
emphasis :: Parser (DocMarkup mod Identifier)
|
||||||
|
emphasis =
|
||||||
|
DocEmphasis . parseAll
|
||||||
|
<$> disallowNewline ("/" *> takeWhile1_ (/= '/') <* "/")
|
||||||
|
|
||||||
|
{- | Bold parser.
|
||||||
|
|
||||||
|
>>> parseString "__Hello world__"
|
||||||
|
DocBold (DocString "Hello world")
|
||||||
|
-}
|
||||||
|
bold :: Parser (DocMarkup mod Identifier)
|
||||||
|
bold = DocBold . parseAll <$> disallowNewline ("__" *> takeUntil "__")
|
||||||
|
|
||||||
|
{- | Monospaced strings.
|
||||||
|
|
||||||
|
>>> parseString "@cruel@"
|
||||||
|
DocMonospaced (DocString "cruel")
|
||||||
|
-}
|
||||||
|
monospace :: Parser (DocMarkup mod Identifier)
|
||||||
|
monospace =
|
||||||
|
DocMonospace . parseAll
|
||||||
|
<$> ("@" *> takeWhile1_ (/= '@') <* "@")
|
||||||
|
|
||||||
|
{- | Text anchors to allow for jumping around the generated documentation.
|
||||||
|
|
||||||
|
>>> parseString "#Hello world#"
|
||||||
|
DocAName "Hello world"
|
||||||
|
-}
|
||||||
|
anchor :: Parser (DocMarkup mod a)
|
||||||
|
anchor =
|
||||||
|
DocAnchor . Text.unpack
|
||||||
|
<$> ("#" *> takeWhile1_ (\x -> x /= '#' && not (Char.isSpace x)) <* "#")
|
||||||
|
|
||||||
|
-- | Parses identifiers with help of 'parseValid'.
|
||||||
|
identifier :: Parser (DocMarkup mod Identifier)
|
||||||
|
identifier = DocIdentifier <$> parseValid
|
||||||
|
|
||||||
|
{- | Module names.
|
||||||
|
|
||||||
|
Note that we allow '#' and '\' to support anchors (old style anchors are of
|
||||||
|
the form "SomeModule\#anchor").
|
||||||
|
-}
|
||||||
|
moduleName :: Parser (DocMarkup mod a)
|
||||||
|
moduleName = DocModule . flip ModuleLink Nothing <$> ("\"" *> moduleNameString <* "\"")
|
||||||
|
|
||||||
|
-- | A module name, optionally with an anchor
|
||||||
|
moduleNameString :: Parser String
|
||||||
|
moduleNameString = moduleId `maybeFollowedBy` anchor_
|
||||||
|
where
|
||||||
|
moduleId = intercalate "." <$> conid `Parsec.sepBy1` "."
|
||||||
|
anchor_ =
|
||||||
|
(++)
|
||||||
|
<$> (Parsec.string "#" <|> Parsec.string "\\#")
|
||||||
|
<*> many (Parsec.satisfy (\c -> c /= '"' && not (Char.isSpace c)))
|
||||||
|
|
||||||
|
maybeFollowedBy pre suf = (\x -> maybe x (x ++)) <$> pre <*> optional suf
|
||||||
|
|
||||||
|
conid =
|
||||||
|
(:)
|
||||||
|
<$> Parsec.satisfy (\c -> Char.isAlpha c && Char.isUpper c)
|
||||||
|
<*> many conChar
|
||||||
|
|
||||||
|
conChar = Parsec.alphaNum <|> Parsec.char '_'
|
||||||
|
|
||||||
|
------------------------
|
||||||
|
-- Markup components
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
{- | List of characters that we use to delimit any special markup.
|
||||||
|
Once we have checked for any of these and tried to parse the
|
||||||
|
relevant markup, we can assume they are used as regular text.
|
||||||
|
-}
|
||||||
|
specialChar :: [Char]
|
||||||
|
specialChar = "_/<@\"&'`#[ "
|
||||||
|
|
||||||
|
------------------------
|
||||||
|
-- Helpers
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
parse' :: Parser a -> Text -> Either String (ParserState, a)
|
||||||
|
parse' parser t =
|
||||||
|
let parser' = (,) <$> parser <*> Parsec.getState
|
||||||
|
in case Parsec.runParser parser' initialParserState "<haddock>" t of
|
||||||
|
Left e -> Left (show e)
|
||||||
|
Right (x, s) -> Right (s, x)
|
||||||
|
|
||||||
|
docConcat :: [DocMarkup mod id] -> DocMarkup mod id
|
||||||
|
docConcat = foldr docAppend DocEmpty
|
||||||
|
where
|
||||||
|
-- Prevent doc append from becoming too nested
|
||||||
|
docAppend (DocDefinitionList ds1) (DocDefinitionList ds2) = DocDefinitionList (ds1 <> ds2)
|
||||||
|
docAppend (DocDefinitionList ds1) (DocAppend (DocDefinitionList ds2) d) = DocAppend (DocDefinitionList (ds1 <> ds2)) d
|
||||||
|
docAppend (DocOrderedList ds1) (DocOrderedList ds2) = DocOrderedList (ds1 <> ds2)
|
||||||
|
docAppend (DocOrderedList ds1) (DocAppend (DocOrderedList ds2) d) = DocAppend (DocOrderedList (ds1 <> ds2)) d
|
||||||
|
docAppend (DocUnorderedList ds1) (DocUnorderedList ds2) = DocUnorderedList (ds1 <> ds2)
|
||||||
|
docAppend (DocUnorderedList ds1) (DocAppend (DocUnorderedList ds2) d) = DocAppend (DocUnorderedList (ds1 <> ds2)) d
|
||||||
|
docAppend DocEmpty d = d
|
||||||
|
docAppend d DocEmpty = d
|
||||||
|
docAppend (DocString s1) (DocString s2) = DocString (s1 <> s2)
|
||||||
|
docAppend (DocAppend d (DocString s1)) (DocString s2) = DocAppend d (DocString (s1 <> s2))
|
||||||
|
docAppend (DocString s1) (DocAppend (DocString s2) d) = DocAppend (DocString (s1 <> s2)) d
|
||||||
|
docAppend d1 d2 = DocAppend d1 d2
|
||||||
|
|
||||||
|
{- | Parse a text paragraph. Actually just a wrapper over 'parseAll' which
|
||||||
|
drops leading whitespace.
|
||||||
|
-}
|
||||||
|
parseText :: Text -> DocMarkup mod Identifier
|
||||||
|
parseText = parseAll . Text.dropWhile Char.isSpace . Text.filter (/= '\r')
|
||||||
|
|
||||||
|
parseAll :: Text -> DocMarkup mod Identifier
|
||||||
|
parseAll = snd . parse myParser
|
||||||
|
where
|
||||||
|
-- docConcat
|
||||||
|
-- <$> many
|
||||||
|
-- ( choice'
|
||||||
|
-- [ monospace
|
||||||
|
-- , anchor
|
||||||
|
-- , identifier
|
||||||
|
-- , moduleName
|
||||||
|
-- , picture
|
||||||
|
-- , mathDisplay
|
||||||
|
-- , mathInline
|
||||||
|
-- , markdownImage
|
||||||
|
-- , markdownLink
|
||||||
|
-- , hyperlink
|
||||||
|
-- , bold
|
||||||
|
-- , emphasis
|
||||||
|
-- , encodedChar
|
||||||
|
-- , string'
|
||||||
|
-- , skipSpecialChar
|
||||||
|
-- ]
|
||||||
|
-- )
|
||||||
|
myParser :: Parser (DocMarkup mod Identifier)
|
||||||
|
myParser =
|
||||||
|
docConcat
|
||||||
|
<$> many
|
||||||
|
( choice'
|
||||||
|
[ monospace
|
||||||
|
, anchor
|
||||||
|
, identifier
|
||||||
|
, moduleName
|
||||||
|
, bold
|
||||||
|
, emphasis
|
||||||
|
, string'
|
||||||
|
, skipSpecialChar
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
choice' :: [Parser a] -> Parser a
|
||||||
|
choice' [] = empty
|
||||||
|
choice' [p] = p
|
||||||
|
choice' (p : ps) = Parsec.try p <|> choice' ps
|
||||||
|
|
||||||
|
disallowNewline :: Parser Text -> Parser Text
|
||||||
|
disallowNewline = mfilter (Text.all (/= '\n'))
|
||||||
65
src/Parser/Util.hs
Normal file
65
src/Parser/Util.hs
Normal file
|
|
@ -0,0 +1,65 @@
|
||||||
|
module Parser.Util where
|
||||||
|
|
||||||
|
import Control.Monad (mfilter)
|
||||||
|
import Data.Functor (($>))
|
||||||
|
import Data.Text (Text)
|
||||||
|
import Data.Text qualified as Text
|
||||||
|
import ParserMonad (Parser)
|
||||||
|
import Text.Parsec (State (..))
|
||||||
|
import Text.Parsec qualified as Parsec
|
||||||
|
import Text.Parsec.Pos (updatePosChar)
|
||||||
|
|
||||||
|
{- | Consume characters from the input up to and including the given pattern.
|
||||||
|
Return everything consumed except for the end pattern itself.
|
||||||
|
-}
|
||||||
|
takeUntil :: Text -> Parser Text
|
||||||
|
takeUntil end_ = Text.dropEnd (Text.length end_) <$> requireEnd (scan p (False, end)) >>= gotSome
|
||||||
|
where
|
||||||
|
end = Text.unpack end_
|
||||||
|
|
||||||
|
p :: (Bool, String) -> Char -> Maybe (Bool, String)
|
||||||
|
p acc c = case acc of
|
||||||
|
(True, _) -> Just (False, end)
|
||||||
|
(_, []) -> Nothing
|
||||||
|
(_, x : xs) | x == c -> Just (False, xs)
|
||||||
|
_ -> Just (c == '\\', end)
|
||||||
|
|
||||||
|
requireEnd = mfilter (Text.isSuffixOf end_)
|
||||||
|
|
||||||
|
gotSome xs
|
||||||
|
| Text.null xs = fail "didn't get any content"
|
||||||
|
| otherwise = return xs
|
||||||
|
|
||||||
|
-- | Like `takeWhile`, but unconditionally take escaped characters.
|
||||||
|
takeWhile_ :: (Char -> Bool) -> Parser Text
|
||||||
|
takeWhile_ p = scan p_ False
|
||||||
|
where
|
||||||
|
p_ escaped c
|
||||||
|
| escaped = Just False
|
||||||
|
| not $ p c = Nothing
|
||||||
|
| otherwise = Just (c == '\\')
|
||||||
|
|
||||||
|
-- | Like 'takeWhile1', but unconditionally take escaped characters.
|
||||||
|
takeWhile1_ :: (Char -> Bool) -> Parser Text
|
||||||
|
takeWhile1_ = mfilter (not . Text.null) . takeWhile_
|
||||||
|
|
||||||
|
{- | Scan the input text, accumulating characters as long as the scanning
|
||||||
|
function returns true.
|
||||||
|
-}
|
||||||
|
scan ::
|
||||||
|
-- | scan function
|
||||||
|
(state -> Char -> Maybe state) ->
|
||||||
|
-- | initial state
|
||||||
|
state ->
|
||||||
|
Parser Text
|
||||||
|
scan f initState = do
|
||||||
|
parserState@State{stateInput = input, statePos = pos} <- Parsec.getParserState
|
||||||
|
(remaining, finalPos, ct) <- go input initState pos 0
|
||||||
|
let newState = parserState{stateInput = remaining, statePos = finalPos}
|
||||||
|
Parsec.setParserState newState $> Text.take ct input
|
||||||
|
where
|
||||||
|
go !input' !st !posAccum !count' = case Text.uncons input' of
|
||||||
|
Nothing -> pure (input', posAccum, count')
|
||||||
|
Just (char', input'') -> case f st char' of
|
||||||
|
Nothing -> pure (input', posAccum, count')
|
||||||
|
Just st' -> go input'' st' (updatePosChar posAccum char') (count' + 1)
|
||||||
26
src/ParserMonad.hs
Normal file
26
src/ParserMonad.hs
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
module ParserMonad where
|
||||||
|
|
||||||
|
import Data.String
|
||||||
|
import Data.Text (Text)
|
||||||
|
import Data.Text qualified as Text
|
||||||
|
import Text.Parsec (Parsec)
|
||||||
|
import Text.Parsec qualified as Parsec
|
||||||
|
|
||||||
|
import Types
|
||||||
|
|
||||||
|
type Parser = Parsec Text ParserState
|
||||||
|
|
||||||
|
instance (a ~ Text) => IsString (Parser a) where
|
||||||
|
fromString = fmap Text.pack . Parsec.string
|
||||||
|
|
||||||
|
{- | The only bit of information we really care about trudging along with us
|
||||||
|
through parsing is the version attached to a @\@since@ annotation - if
|
||||||
|
the doc even contained one.
|
||||||
|
-}
|
||||||
|
newtype ParserState = ParserState
|
||||||
|
{ since :: Maybe Since
|
||||||
|
}
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
initialParserState :: ParserState
|
||||||
|
initialParserState = ParserState Nothing
|
||||||
196
src/Types.hs
Normal file
196
src/Types.hs
Normal file
|
|
@ -0,0 +1,196 @@
|
||||||
|
module Types (
|
||||||
|
DocMarkup (..),
|
||||||
|
Document (..),
|
||||||
|
Meta (..),
|
||||||
|
ModuleLink (..),
|
||||||
|
Package,
|
||||||
|
Since (..),
|
||||||
|
Version,
|
||||||
|
)
|
||||||
|
where
|
||||||
|
|
||||||
|
newtype Document = Document
|
||||||
|
{ meta :: Meta
|
||||||
|
}
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
newtype Meta = Meta
|
||||||
|
{ since :: Maybe Since
|
||||||
|
}
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
data Since = Since
|
||||||
|
{ package :: Maybe Package
|
||||||
|
-- ^ optional package qualification
|
||||||
|
, version :: Version
|
||||||
|
}
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
-- Could have a better type?
|
||||||
|
type Version = [Int]
|
||||||
|
type Package = String
|
||||||
|
|
||||||
|
data DocMarkup mod id
|
||||||
|
= DocEmpty
|
||||||
|
| -- | This is not represented in the markup language, this is for internal use
|
||||||
|
DocAppend (DocMarkup mod id) (DocMarkup mod id)
|
||||||
|
| -- | Any text that doesn't match any rules is a bare string
|
||||||
|
DocString String
|
||||||
|
| -- | Paragraphs are demarcated by blank lines
|
||||||
|
DocParagraph (DocMarkup mod id)
|
||||||
|
| -- | A haskell identifier
|
||||||
|
DocIdentifier id
|
||||||
|
| -- | A qualified identifier that couldn't be resolved.
|
||||||
|
DocIdentifierUnchecked
|
||||||
|
| -- | A link to a module, might include a label
|
||||||
|
DocModule (ModuleLink (DocMarkup mod id))
|
||||||
|
| -- | Emphasis /italics/
|
||||||
|
DocEmphasis (DocMarkup mod id)
|
||||||
|
| -- | Monospaced @source code@
|
||||||
|
DocMonospace (DocMarkup mod id)
|
||||||
|
| -- | Bold __bold text__
|
||||||
|
DocBold (DocMarkup mod id)
|
||||||
|
| {- | Unordered lists
|
||||||
|
* this
|
||||||
|
or
|
||||||
|
- this
|
||||||
|
-}
|
||||||
|
DocUnorderedList [DocMarkup mod id]
|
||||||
|
| {- | Ordered lists
|
||||||
|
1. this
|
||||||
|
or
|
||||||
|
(1) this
|
||||||
|
-}
|
||||||
|
DocOrderedList [(Int, DocMarkup mod id)]
|
||||||
|
| {- | Definition lists
|
||||||
|
[term] a term
|
||||||
|
[another term] another definition
|
||||||
|
-}
|
||||||
|
DocDefinitionList [(DocMarkup mod id, DocMarkup mod id)]
|
||||||
|
| {- | Code blocks
|
||||||
|
@
|
||||||
|
a code block in here
|
||||||
|
with multiple lines
|
||||||
|
@
|
||||||
|
|
||||||
|
Or with bird tracks:
|
||||||
|
> some code
|
||||||
|
> goes here
|
||||||
|
-}
|
||||||
|
DocCodeBlock (DocMarkup mod id)
|
||||||
|
| {- | Hyperlinks
|
||||||
|
__marked__:
|
||||||
|
<http://example.com>
|
||||||
|
<http://example.com label text>
|
||||||
|
__Auto-detected URLs__:
|
||||||
|
http://example.com
|
||||||
|
https://example.com
|
||||||
|
ftp://example.com
|
||||||
|
__Markdown style__
|
||||||
|
[link text](http://example.com)
|
||||||
|
[link text]("Module.Name")
|
||||||
|
-}
|
||||||
|
DocHyperlink (Hyperlink (DocMarkup mod id))
|
||||||
|
| {- | Pictures
|
||||||
|
<<image.png>>
|
||||||
|
<<image.png title text>>
|
||||||
|
|
||||||
|
__Markdown Images__
|
||||||
|
|
||||||
|

|
||||||
|
-}
|
||||||
|
DocPicture Picture
|
||||||
|
| {- | Inline math expressions
|
||||||
|
\(mathematical expression\)
|
||||||
|
-}
|
||||||
|
DocMathInline String
|
||||||
|
| {- | Math multiline display
|
||||||
|
\[
|
||||||
|
mathematical expression
|
||||||
|
in multiple lines
|
||||||
|
\]
|
||||||
|
-}
|
||||||
|
DocMathDisplay String
|
||||||
|
| {- | Anchors, no spaces allowed
|
||||||
|
#anchor-name#
|
||||||
|
-}
|
||||||
|
DocAnchor String
|
||||||
|
| {- | Property descriptions
|
||||||
|
prop> property description
|
||||||
|
-}
|
||||||
|
DocProperty String
|
||||||
|
| {- | Examples
|
||||||
|
>>> expression
|
||||||
|
result line 1
|
||||||
|
result line 2
|
||||||
|
-}
|
||||||
|
DocExamples [Example]
|
||||||
|
| -- | Header
|
||||||
|
DocHeader (Header (DocMarkup mod id))
|
||||||
|
| -- Table
|
||||||
|
DocTable (Table (DocMarkup mod id))
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
instance Semigroup (DocMarkup mod id) where
|
||||||
|
(<>) = DocAppend
|
||||||
|
|
||||||
|
instance Monoid (DocMarkup mod id) where
|
||||||
|
mempty = DocEmpty
|
||||||
|
mconcat = foldr (<>) mempty
|
||||||
|
|
||||||
|
data ModuleLink id = ModuleLink
|
||||||
|
{ name :: String
|
||||||
|
, label :: Maybe id
|
||||||
|
}
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
data Picture = Picture
|
||||||
|
{ uri :: String
|
||||||
|
, title :: Maybe String
|
||||||
|
}
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
data Hyperlink id = Hyperlink
|
||||||
|
{ url :: String
|
||||||
|
, label :: Maybe id
|
||||||
|
}
|
||||||
|
deriving (Eq, Show, Functor, Foldable, Traversable)
|
||||||
|
|
||||||
|
data TableCell id = TableCell
|
||||||
|
{ col :: Int
|
||||||
|
, row :: Int
|
||||||
|
, content :: id
|
||||||
|
}
|
||||||
|
deriving (Eq, Show, Functor, Foldable, Traversable)
|
||||||
|
|
||||||
|
newtype TableRow id = TableRow
|
||||||
|
{ rows :: [TableCell id]
|
||||||
|
}
|
||||||
|
deriving (Eq, Show, Functor, Foldable, Traversable)
|
||||||
|
|
||||||
|
data Table id = Table
|
||||||
|
{ headerRows :: [TableRow id]
|
||||||
|
, bodyRows :: [TableRow id]
|
||||||
|
}
|
||||||
|
deriving (Eq, Show, Functor, Foldable, Traversable)
|
||||||
|
|
||||||
|
data Example = Example
|
||||||
|
{ exampleExpression :: String
|
||||||
|
, exampleResult :: [String]
|
||||||
|
}
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
data Header id = Header
|
||||||
|
{ level :: HeaderLevel
|
||||||
|
, title :: id
|
||||||
|
}
|
||||||
|
deriving (Eq, Show)
|
||||||
|
|
||||||
|
data HeaderLevel
|
||||||
|
= H1
|
||||||
|
| H2
|
||||||
|
| H3
|
||||||
|
| H4
|
||||||
|
| H5
|
||||||
|
| H6
|
||||||
|
deriving (Eq, Show, Bounded, Enum)
|
||||||
34
test/Spec.hs
Normal file
34
test/Spec.hs
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
{-# OPTIONS_GHC -Wno-orphans #-}
|
||||||
|
|
||||||
|
import Test.Hspec
|
||||||
|
|
||||||
|
import Data.String (IsString (..))
|
||||||
|
import Data.Text (Text)
|
||||||
|
|
||||||
|
import Identifier (Identifier)
|
||||||
|
import Lexer
|
||||||
|
import Parser
|
||||||
|
import Types
|
||||||
|
|
||||||
|
main :: IO ()
|
||||||
|
main = hspec $ do
|
||||||
|
describe "Lexer" do
|
||||||
|
it "lexes" do
|
||||||
|
lexer "This is string" `shouldBe` undefined
|
||||||
|
describe "Parser" do
|
||||||
|
it "Bold" do
|
||||||
|
"__bold__" `shouldParseTo` (DocBold (DocString "bold"))
|
||||||
|
it "Emphasis" do
|
||||||
|
"/emphasis/" `shouldParseTo` (DocEmphasis (DocString "emphasis"))
|
||||||
|
|
||||||
|
shouldParseTo :: Text -> DocMarkup mod Identifier -> Expectation
|
||||||
|
shouldParseTo input ast = parseText input `shouldBe` ast
|
||||||
|
|
||||||
|
type Doc id = DocMarkup () id
|
||||||
|
|
||||||
|
instance IsString (Doc String) where
|
||||||
|
fromString = DocString
|
||||||
|
|
||||||
|
file :: IO String
|
||||||
|
file = readFile "test/markup.md"
|
||||||
89
test/markup.md
Normal file
89
test/markup.md
Normal file
|
|
@ -0,0 +1,89 @@
|
||||||
|
/emphasized text/
|
||||||
|
|
||||||
|
__bold text__
|
||||||
|
|
||||||
|
@monospace text@
|
||||||
|
|
||||||
|
"Module.Name"
|
||||||
|
"Module.Name#anchor"
|
||||||
|
"Module.Name#anchor"
|
||||||
|
|
||||||
|
[http://example.com](http://example.com)
|
||||||
|
<[http://example.com](http://example.com) label text>
|
||||||
|
|
||||||
|
[http://example.com](http://example.com)
|
||||||
|
[https://example.com](https://example.com)
|
||||||
|
ftp\://example.com
|
||||||
|
|
||||||
|
[link text](http://example.com)
|
||||||
|
[link text]("Module.Name")
|
||||||
|
|
||||||
|
#anchor-name#
|
||||||
|
|
||||||
|
'identifier'
|
||||||
|
|
||||||
|
<<image.png>>
|
||||||
|
<<image.png title text>>
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
\(mathematical expression\)
|
||||||
|
\[mathematical expression\]
|
||||||
|
|
||||||
|
@
|
||||||
|
code block content
|
||||||
|
with multiple lines
|
||||||
|
@
|
||||||
|
|
||||||
|
> code line 1
|
||||||
|
> code line 2
|
||||||
|
|
||||||
|
>>> expression
|
||||||
|
result line 1
|
||||||
|
result line 2
|
||||||
|
|
||||||
|
>>> another expression
|
||||||
|
result
|
||||||
|
|
||||||
|
prop> property description
|
||||||
|
|
||||||
|
* item 1
|
||||||
|
* item 2
|
||||||
|
continued content
|
||||||
|
|
||||||
|
- item 1
|
||||||
|
- item 2
|
||||||
|
|
||||||
|
1. item 1
|
||||||
|
2. item 2
|
||||||
|
|
||||||
|
(1) item 1
|
||||||
|
(2) item 2
|
||||||
|
|
||||||
|
[term] definition content
|
||||||
|
[another term] more definition content
|
||||||
|
|
||||||
|
+----------+----------+
|
||||||
|
| Header 1 | Header 2 |
|
||||||
|
+==========+==========+
|
||||||
|
| Cell 1 | Cell 2 |
|
||||||
|
+----------+----------+
|
||||||
|
| Cell 3 | Cell 4 |
|
||||||
|
+----------+----------+
|
||||||
|
|
||||||
|
= Level 1 Header
|
||||||
|
== Level 2 Header
|
||||||
|
=== Level 3 Header
|
||||||
|
==== Level 4 Header
|
||||||
|
===== Level 5 Header
|
||||||
|
====== Level 6 Header
|
||||||
|
|
||||||
|
@since package-name-1.2.3
|
||||||
|
@since 1.2.3
|
||||||
|
|
||||||
|
A (decimal)
|
||||||
|
A (hexadecimal)
|
||||||
|
A (hexadecimal)
|
||||||
|
|
||||||
|
|
||||||
|
This \@escapes\@ at signs
|
||||||
Loading…
Add table
Add a link
Reference in a new issue