diff --git a/src/Lexer.hs b/src/Lexer.hs index a4b00bb..89bafc5 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -56,7 +56,7 @@ data Token | MathsBracketOpen | MathsBracketClose | NumericEntity Int - | Module + | Module Text | QuoteOpen | QuoteClose | Space @@ -137,14 +137,14 @@ delimitedAsTuple openP closeP = <*> located closeP delimited :: Parser a -> Parser b -> Token -> Token -> Parser [LocatedToken] -delimited openP closeP openTok closeTok = fuse <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP) +delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP) where - fuse (a, tok, b) = [a, tok, b] + asList (a, tok, b) = [a, tok, b] delimitedNoTrailing :: Parser a -> Parser b -> Token -> Parser [LocatedToken] -delimitedNoTrailing openP closeP openTok = fuse <$> delimitedAsTuple (openTok <$ openP) (void closeP) +delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) where - fuse (a, tok, _) = [a, tok] + asList (a, tok, _) = [a, tok] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric s t1 t2 = delimited s s t1 t2 @@ -152,6 +152,9 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2 eol :: Parser () eol = void "\n" <|> void "\r\n" <|> Parsec.eof +anchorHash :: Parser Text +anchorHash = "#" <|> try "\\#" + header1 :: Lexer header1 = delimitedNoTrailing "= " eol (Header One) @@ -172,41 +175,36 @@ header6 = delimitedNoTrailing "====== " eol (Header Six) -- #anchors# anchors :: Lexer -anchors = do - pos <- getPosition - void $ try anchor' - txt <- anyUntil anchor' - void $ try anchor' +anchors = + tokenise + [ between anchorHash anchorHash (Anchor <$> anyUntil anchorHash) + ] - pure [(pos, Anchor txt)] - where - anchor' = (string "#" <|> string "\\#") + +moduleName :: Parser Text +moduleName = intercalate "." . fmap Text.pack <$> upperId `sepBy1` char '.' + +upperId :: Parser String +upperId = (:) <$> satisfy isUpper <*> many1 identifierChar + +identifierChar :: Parser Char +identifierChar = satisfy (\c -> isAlphaNum c || c == '_') -- "Module.Name" -- "Module.Name#anchor" -- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben modules :: Lexer -modules = do - startPos <- startPosition $ char '"' - (modPos, modName) <- located modId - anch <- option [] do - anchPos <- startPosition (string "#" <|> string' "\\#") - txt <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) - pure [(anchPos, Anchor txt)] - - void $ char '"' - pure $ [(startPos, Module), (modPos, Token modName)] <> anch +modules = between (char '"') (char '"') inner where - modId = intercalate "." <$> (fmap Text.pack <$> (conId `sepBy1` (char '.'))) + inner = do + module_ <- located $ Module <$> moduleName + mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText)) + pure $ case mAnchor of + Just anchor -> [module_, anchor] + Nothing -> [module_] - conId :: Parser String - conId = - (:) - <$> satisfy (\c -> isAlpha c && isUpper c) - <*> many1 conChar - - conChar :: Parser Char - conChar = satisfy (\c -> isAlphaNum c || c == '_') + anchorText :: Parser Text + anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) linkRaw :: Lexer linkRaw = diff --git a/test/Spec.hs b/test/Spec.hs index 2545c20..8207b77 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -44,19 +44,16 @@ main = hspec $ do modules :: Expectation modules = do "\"MyModule.Name\"" - `shouldLexTo` [ (1, 1, Module) - , (1, 2, Token "MyModule.Name") + `shouldLexTo` [ (1, 2, Module "MyModule.Name") ] "\"OtherModule.Name#myAnchor\"" - `shouldLexTo` [ (1, 1, Module) - , (1, 2, Token "OtherModule.Name") + `shouldLexTo` [ (1, 2, Module "OtherModule.Name") , (1, 18, Anchor "myAnchor") ] "\"OtherModule.Name\\#myAnchor\"" - `shouldLexTo` [ (1, 1, Module) - , (1, 2, Token "OtherModule.Name") + `shouldLexTo` [ (1, 2, Module "OtherModule.Name") , (1, 18, Anchor "myAnchor") ] link :: Expectation