diff --git a/src/Lexer.hs b/src/Lexer.hs index 89bafc5..a4b00bb 100644 --- a/src/Lexer.hs +++ b/src/Lexer.hs @@ -56,7 +56,7 @@ data Token | MathsBracketOpen | MathsBracketClose | NumericEntity Int - | Module Text + | Module | QuoteOpen | QuoteClose | Space @@ -137,14 +137,14 @@ delimitedAsTuple openP closeP = <*> located closeP delimited :: Parser a -> Parser b -> Token -> Token -> Parser [LocatedToken] -delimited openP closeP openTok closeTok = asList <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP) +delimited openP closeP openTok closeTok = fuse <$> delimitedAsTuple (openTok <$ openP) (closeTok <$ closeP) where - asList (a, tok, b) = [a, tok, b] + fuse (a, tok, b) = [a, tok, b] delimitedNoTrailing :: Parser a -> Parser b -> Token -> Parser [LocatedToken] -delimitedNoTrailing openP closeP openTok = asList <$> delimitedAsTuple (openTok <$ openP) (void closeP) +delimitedNoTrailing openP closeP openTok = fuse <$> delimitedAsTuple (openTok <$ openP) (void closeP) where - asList (a, tok, _) = [a, tok] + fuse (a, tok, _) = [a, tok] delimitedSymmetric :: Parser a -> Token -> Token -> Parser [LocatedToken] delimitedSymmetric s t1 t2 = delimited s s t1 t2 @@ -152,9 +152,6 @@ delimitedSymmetric s t1 t2 = delimited s s t1 t2 eol :: Parser () eol = void "\n" <|> void "\r\n" <|> Parsec.eof -anchorHash :: Parser Text -anchorHash = "#" <|> try "\\#" - header1 :: Lexer header1 = delimitedNoTrailing "= " eol (Header One) @@ -175,36 +172,41 @@ header6 = delimitedNoTrailing "====== " eol (Header Six) -- #anchors# anchors :: Lexer -anchors = - tokenise - [ between anchorHash anchorHash (Anchor <$> anyUntil anchorHash) - ] +anchors = do + pos <- getPosition + void $ try anchor' + txt <- anyUntil anchor' + void $ try anchor' - -moduleName :: Parser Text -moduleName = intercalate "." . fmap Text.pack <$> upperId `sepBy1` char '.' - -upperId :: Parser String -upperId = (:) <$> satisfy isUpper <*> many1 identifierChar - -identifierChar :: Parser Char -identifierChar = satisfy (\c -> isAlphaNum c || c == '_') + pure [(pos, Anchor txt)] + where + anchor' = (string "#" <|> string "\\#") -- "Module.Name" -- "Module.Name#anchor" -- "Module.Name\#anchor" -- this has been deprecated for 9 years, thanks Ben modules :: Lexer -modules = between (char '"') (char '"') inner - where - inner = do - module_ <- located $ Module <$> moduleName - mAnchor <- optionMaybe (located $ anchorHash *> (Anchor <$> anchorText)) - pure $ case mAnchor of - Just anchor -> [module_, anchor] - Nothing -> [module_] +modules = do + startPos <- startPosition $ char '"' + (modPos, modName) <- located modId + anch <- option [] do + anchPos <- startPosition (string "#" <|> string' "\\#") + txt <- Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) + pure [(anchPos, Anchor txt)] - anchorText :: Parser Text - anchorText = Text.pack <$> many (satisfy (\c -> c /= '"' && not (isSpace c))) + void $ char '"' + pure $ [(startPos, Module), (modPos, Token modName)] <> anch + where + modId = intercalate "." <$> (fmap Text.pack <$> (conId `sepBy1` (char '.'))) + + conId :: Parser String + conId = + (:) + <$> satisfy (\c -> isAlpha c && isUpper c) + <*> many1 conChar + + conChar :: Parser Char + conChar = satisfy (\c -> isAlphaNum c || c == '_') linkRaw :: Lexer linkRaw = diff --git a/test/Spec.hs b/test/Spec.hs index 8207b77..2545c20 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -44,16 +44,19 @@ main = hspec $ do modules :: Expectation modules = do "\"MyModule.Name\"" - `shouldLexTo` [ (1, 2, Module "MyModule.Name") + `shouldLexTo` [ (1, 1, Module) + , (1, 2, Token "MyModule.Name") ] "\"OtherModule.Name#myAnchor\"" - `shouldLexTo` [ (1, 2, Module "OtherModule.Name") + `shouldLexTo` [ (1, 1, Module) + , (1, 2, Token "OtherModule.Name") , (1, 18, Anchor "myAnchor") ] "\"OtherModule.Name\\#myAnchor\"" - `shouldLexTo` [ (1, 2, Module "OtherModule.Name") + `shouldLexTo` [ (1, 1, Module) + , (1, 2, Token "OtherModule.Name") , (1, 18, Anchor "myAnchor") ] link :: Expectation