gpt4 book ai didi

parsing - 使用 Megaparsec 缩进

转载 作者:行者123 更新时间:2023-12-04 15:18:13 29 4
gpt4 key购买 nike

我想使用 Megaparsec 解析一种基本的缩进语言。最初我使用的是 Parsec,我设法通过缩进正常工作,但现在我遇到了一些麻烦。

我一直在关注一个教程here这是我必须解析一种忽略缩进的语言的代码。

module Parser where

import Data.Functor ((<$>), (<$))
import Control.Applicative (Applicative(..))
import qualified Control.Monad as M
import Control.Monad (void)
import Data.Functor.Identity
import Data.Text (Text)
import qualified Data.Text as Text

import Data.Void

import Text.Megaparsec
import Text.Megaparsec.Char
import Text.Megaparsec.Perm
import Text.Megaparsec.Expr
import qualified Text.Megaparsec.Char.Lexer as L

import Text.Pretty.Simple
import Data.Either.Unwrap

--import Lexer
import Syntax

type Parser = Parsec Void String

lineComment :: Parser ()
lineComment = L.skipLineComment "#"

scn :: Parser ()
scn = L.space space1 lineComment empty

sc :: Parser () -- ‘sc’ stands for “space consumer”
sc = L.space (void $ takeWhile1P Nothing f) lineComment empty
where
f x = x == ' ' || x == '\t'

lexeme :: Parser a -> Parser a
lexeme = L.lexeme sc

symbol :: String -> Parser String
symbol = L.symbol sc

integer :: Parser Integer
integer = lexeme L.decimal


semi :: Parser String
semi = symbol ";"

rword :: String -> Parser ()
rword w = lexeme (string w *> notFollowedBy alphaNumChar)

rws :: [String] -- list of reserved words
rws = ["if","then","else","while","do","skip","true","false","not","and","or"]

identifier :: Parser String
identifier = (lexeme . try) (p >>= check)
where
p = (:) <$> letterChar <*> many alphaNumChar
check x = if x `elem` rws
then fail $ "keyword " ++ show x ++ " cannot be an identifier"
else return x


parens :: Parser a -> Parser a
parens = between (symbol "(") (symbol ")")


whileParser :: Parser Stmt
whileParser = between sc eof stmt

stmt :: Parser Stmt
stmt = f <$> sepBy1 stmt' semi
where
-- if there's only one stmt return it without using ‘Seq’
f l = if length l == 1 then head l else Seq l

stmt' :: Parser Stmt
stmt' = ifStmt
<|> whileStmt
<|> skipStmt
<|> assignStmt
<|> parens stmt

ifStmt :: Parser Stmt
ifStmt = do
rword "if"
cond <- bExpr
rword "then"
stmt1 <- stmt
rword "else"
stmt2 <- stmt
return (If cond stmt1 stmt2)

whileStmt :: Parser Stmt
whileStmt = do
rword "while"
cond <- bExpr
rword "do"
stmt1 <- stmt
return (While cond stmt1)

assignStmt :: Parser Stmt
assignStmt = do
var <- identifier
void (symbol ":=")
expr <- aExpr
return (Assign var expr)

skipStmt :: Parser Stmt
skipStmt = Skip <$ rword "skip"

aExpr :: Parser AExpr
aExpr = makeExprParser aTerm aOperators

bExpr :: Parser BExpr
bExpr = makeExprParser bTerm bOperators

aOperators :: [[Operator Parser AExpr]]
aOperators =
[ [Prefix (Neg <$ symbol "-") ]
, [ InfixL (ABinary Multiply <$ symbol "*")
, InfixL (ABinary Divide <$ symbol "/") ]
, [ InfixL (ABinary Add <$ symbol "+")
, InfixL (ABinary Subtract <$ symbol "-") ]
]

bOperators :: [[Operator Parser BExpr]]
bOperators =
[ [Prefix (Not <$ rword "not") ]
, [InfixL (BBinary And <$ rword "and")
, InfixL (BBinary Or <$ rword "or") ]
]

aTerm :: Parser AExpr
aTerm = parens aExpr
<|> Var <$> identifier
<|> IntConst <$> integer

bTerm :: Parser BExpr
bTerm = parens bExpr
<|> (BoolConst True <$ rword "true")
<|> (BoolConst False <$ rword "false")
<|> rExpr

rExpr :: Parser BExpr
rExpr = do
a1 <- aExpr
op <- relation
a2 <- aExpr
return (RBinary op a1 a2)

relation :: Parser RBinOp
relation = (symbol ">" *> pure Greater)
<|> (symbol "<" *> pure Less)

parsePrint :: String -> IO()
parsePrint s = do
parseTest stmt' s

运行此解析正确。
parsePrint $ unlines
[ "while (true) do if(false) then x := 5 else y := 20"
]

这是第二篇教程 here 中解析缩进的代码.
{-# LANGUAGE TupleSections #-}

module Main where

import Control.Applicative (empty)
import Control.Monad (void)
import Data.Void
import Data.Char (isAlphaNum)
import Text.Megaparsec
import Text.Megaparsec.Char
import qualified Text.Megaparsec.Char.Lexer as L

type Parser = Parsec Void String

lineComment :: Parser ()
lineComment = L.skipLineComment "#"

scn :: Parser ()
scn = L.space space1 lineComment empty

sc :: Parser ()
sc = L.space (void $ takeWhile1P Nothing f) lineComment empty
where
f x = x == ' ' || x == '\t'

lexeme :: Parser a -> Parser a
lexeme = L.lexeme sc

pItem :: Parser String
pItem = lexeme (takeWhile1P Nothing f) <?> "list item"
where
f x = isAlphaNum x || x == '-'

pComplexItem :: Parser (String, [String])
pComplexItem = L.indentBlock scn p
where
p = do
header <- pItem
return (L.IndentMany Nothing (return . (header, )) pLineFold)

pLineFold :: Parser String
pLineFold = L.lineFold scn $ \sc' ->
let ps = takeWhile1P Nothing f `sepBy1` try sc'
f x = isAlphaNum x || x == '-'
in unwords <$> ps <* sc

pItemList :: Parser (String, [(String, [String])])
pItemList = L.nonIndented scn (L.indentBlock scn p)
where
p = do
header <- pItem
return (L.IndentSome Nothing (return . (header, )) pComplexItem)

parser :: Parser (String, [(String, [String])])
parser = pItemList <* eof

main :: IO ()
main = return ()

我想作为一个正确解析的例子。
parsePrint $ unlines
[ "while (true) do"
, " if(false) then x := 5 else y := 20"
]

我怎样才能正确解析缩进?还有其他地方有关于使用 Megaparsec 的教程/文档吗?

最佳答案

在过去的几周里花了很多时间在这件事上之后,我设法解决了这个问题。这是从使用字符串转移到使用我自己的“Expr”数据类型的问题。
对于任何想开始编写缩进语言的人来说,这段代码可能是一个好的开始!

解析器

{-# LANGUAGE TupleSections #-}

module IndentTest where

import Control.Applicative (empty)
import Control.Monad (void)
import Data.Void
import Data.Char (isAlphaNum)
import Text.Megaparsec
import Text.Megaparsec.Char
import qualified Text.Megaparsec.Char.Lexer as L
import Text.Megaparsec.Expr

import Block

type Parser = Parsec Void String

-- Tokens


lineComment :: Parser ()
lineComment = L.skipLineComment "#"


scn :: Parser ()
scn = L.space space1 lineComment empty


sc :: Parser ()
sc = L.space (void $ takeWhile1P Nothing f) lineComment empty
where
f x = x == ' ' || x == '\t'


symbol :: String -> Parser String
symbol = L.symbol sc


rword :: String -> Parser ()
rword w = lexeme (string w *> notFollowedBy alphaNumChar)


rws :: [String] -- list of reserved words
rws = ["module", "println", "import", "let", "if","then","else","while","do","skip","true","false","not","and","or"]


word :: Parser String
word = (lexeme . try) (p >>= check)
where
p = (:) <$> alphaNumChar <*> many alphaNumChar
check x = if x `elem` rws
then fail $ "keyword " ++ show x ++ " cannot be an word"
else return x

lexeme :: Parser a -> Parser a
lexeme = L.lexeme sc


integer :: Parser Integer
integer = lexeme L.decimal


parens :: Parser a -> Parser a
parens = between (symbol "(") (symbol ")")


aTerm :: Parser AExpr
aTerm = parens aExpr
<|> Var <$> identifier
<|> IntConst <$> integer


aOperators :: [[Operator Parser AExpr]]
aOperators =
[ [Prefix (Neg <$ symbol "-") ]
, [ InfixL (ABinary Multiply <$ symbol "*")
, InfixL (ABinary Divide <$ symbol "/") ]
, [ InfixL (ABinary Add <$ symbol "+")
, InfixL (ABinary Subtract <$ symbol "-") ]
]


aExpr :: Parser AExpr
aExpr = makeExprParser aTerm aOperators


assignArith :: Parser Expr
assignArith = do
var <- identifier
symbol ":"
vType <- valType
symbol "="
e <- aExpr
return $ AssignArith vType var e


bTerm :: Parser BExpr
bTerm = parens bExpr
<|> (BoolConst True <$ rword "true")
<|> (BoolConst False <$ rword "false")
<|> rExpr


bOperators :: [[Operator Parser BExpr]]
bOperators =
[ [Prefix (Not <$ rword "not") ]
, [InfixL (BBinary And <$ rword "and")
, InfixL (BBinary Or <$ rword "or") ]
]


bExpr :: Parser BExpr
bExpr = makeExprParser bTerm bOperators


rExpr :: Parser BExpr
rExpr = do
a1 <- aExpr
op <- relation
a2 <- aExpr
return (RBinary op a1 a2)


relation :: Parser RBinOp
relation = (symbol ">" *> pure Greater)
<|> (symbol "<" *> pure Less)


identifier :: Parser String
identifier = (lexeme . try) (p >>= check)
where
p = (:) <$> letterChar <*> many alphaNumChar
check x = if x `elem` rws
then fail $ "keyword " ++ show x ++ " cannot be an identifier"
else return x


stringLiteral :: Parser Expr
stringLiteral = do
value <- char '"' >> manyTill L.charLiteral (char '"')
symbol ";"
return $ StringLiteral value


assignString :: Parser Expr
assignString = do
var <- identifier
symbol ":"
vType <- valType
symbol "="
e <- stringLiteral
return (AssignString vType var e)


arrayDef :: Parser Expr
arrayDef = do
name <- identifier
symbol ":"

symbol "["
arrType <- word
symbol "]"

symbol "="
return $ ArrayDef arrType name

arrayValues :: Parser Expr
arrayValues = do
symbol "["
values <- many identifier
symbol "]"
return $ ArrayValues values

arrayAssign :: Parser Expr
arrayAssign = do
def <- arrayDef
values <- arrayValues
return $ ArrayAssignment def values

arrayElementSelect :: Parser Expr
arrayElementSelect = do
symbol "!!"
elementNum <- word
return $ ArrayElementSelect elementNum


moduleParser :: Parser Expr
moduleParser = L.nonIndented scn (L.indentBlock scn p)
where
p = do
rword "module"
name <- identifier
return (L.IndentSome Nothing (return . (Module name)) expr')


valType :: Parser Expr
valType = do
value <- identifier
return $ Type value


argumentType :: Parser Expr
argumentType = do
value <- identifier
return $ ArgumentType value


returnType :: Parser Expr
returnType = do
value <- identifier
return $ ReturnType value


argument :: Parser Expr
argument = do
value <- identifier
return $ Argument value


-- Function parser
functionParser :: Parser Expr
functionParser = L.indentBlock scn p
where
p = do
name <- identifier
symbol ":"
argTypes <- some argumentType
symbol "->"
rType <- IndentTest.returnType
nameDup <- L.lineFold scn $ \sp' ->
(identifier) `sepBy1` try sp' <* scn
args <- many argument
symbol "="
if(name == "main") then
return (L.IndentMany Nothing (return . (MainFunction name argTypes args rType)) expr')
else
return (L.IndentMany Nothing (return . (Function name argTypes args rType)) expr')



functionCallParser :: Parser Expr
functionCallParser = do
name <- identifier
args <- parens $ many argument
return $ FunctionCall name args


printParser :: Parser Expr
printParser = do
rword "println"
bodyArr <- identifier
symbol ";"
return $ Print bodyArr


valueToken :: Parser String
valueToken = lexeme (takeWhile1P Nothing f) <?> "list item"
where
f x = isAlphaNum x || x == '-'


ifStmt :: Parser Expr
ifStmt = L.indentBlock scn p
where
p = do
rword "if"
cond <- bExpr
return (L.IndentMany Nothing (return . (If cond)) expr')

elseStmt :: Parser Expr
elseStmt = L.indentBlock scn p
where
p = do
rword "else"
return (L.IndentMany Nothing (return . (Else)) expr')

whereStmt :: Parser Expr
whereStmt = do
rword "where"
symbol "{"
exprs <- many expr
symbol "}"
return $ (Where exprs)


expr :: Parser Expr
expr = f <$> sepBy1 expr' (symbol ";")
where
-- if there's only one expr return it without using ‘Seq’
f l = if length l == 1 then head l else Seq l


expr' :: Parser Expr
expr' = try moduleParser
<|> try functionParser
<|> try ifStmt
<|> try elseStmt
<|> try arrayAssign
<|> arrayElementSelect
<|> try assignArith
<|> try functionCallParser
<|> try assignString
<|> try printParser
<|> try whereStmt
<|> try stringLiteral


parser :: Parser Expr
parser = expr'


parseFromFile file = runParser expr file <$> readFile file


parseString input =
case parse expr' "" input of
Left e -> show e
Right x -> show x


parsePrint :: String -> IO()
parsePrint s = parseTest' parser s

block /Expr - AST 由这个组成
module Block where

import Data.List
import Text.Show.Functions
import Data.Char
import Data.Maybe

-- Boolean expressions
data BExpr
= BoolConst Bool
| Not BExpr
| BBinary BBinOp BExpr BExpr
| RBinary RBinOp AExpr AExpr

instance Show BExpr where
show (BoolConst b) = lowerString $ show b
show (Not n) = show n
show (BBinary bbinop bExpr1 bExpr2) = show bExpr1 ++ " " ++ show bbinop ++ " " ++ show bExpr2
show (RBinary rbinop aExpr1 aExpr2) = show aExpr1 ++ " " ++ show rbinop ++ " " ++ show aExpr2


-- Boolean ops
data BBinOp
= And
| Or

instance Show BBinOp where
show (And) = "&&"
show (Or) = "||"

-- R binary ops
data RBinOp
= Greater
| Less

instance Show RBinOp where
show (Greater) = ">"
show (Less) = "<"

-- Arithmetic expressions
data AExpr
= Var String
| IntConst Integer
| Neg AExpr
| ABinary ABinOp AExpr AExpr
| Parenthesis AExpr

instance Show AExpr where
show (Var v) = v
show (IntConst i) = show i
show (Neg aExpr) = "-" ++ show aExpr
show (ABinary aBinOp aExpr1 aExpr2) = show aExpr1 ++ " " ++ show aBinOp ++ " " ++ show aExpr2
show (Parenthesis aExpr) = "(" ++ show aExpr ++ ")"

-- Arithmetic ops
data ABinOp
= OpeningParenthesis
| ClosingParenthesis
| Add
| Subtract
| Multiply
| Divide

instance Show ABinOp where
show (Add) = "+"
show (Subtract) = "-"
show (Multiply) = "*"
show (Divide) = "/"
show (OpeningParenthesis) = "("
show (ClosingParenthesis) = ")"

-- Statements
data Expr
= Seq [Expr]
| Module String [Expr]
| Import String String
| MainFunction {name ::String, argTypes:: [Expr], args::[Expr], returnType::Expr, body::[Expr]}
| Function String [Expr] [Expr] Expr [Expr]
| FunctionCall String [Expr]
| Type String
| ValueType String
| Argument String
| ArgumentType String
| ReturnType String
| AssignArith Expr String AExpr
| AssignString Expr String Expr
| If BExpr [Expr]
| Else [Expr]
| While BExpr [Expr]
| Print String
| Return Expr
| ArrayValues [String]
| ArrayDef String String
| ArrayAssignment Expr Expr
| ArrayElementSelect String
| Lambda String String
| Where [Expr]
| StringLiteral String
| Skip

instance Show Expr where
show (Module name bodyArray) =
-- Get the main function tree

"public class " ++ name ++ "{\n" ++
"public static void main(String[] args){\n" ++
name ++ " " ++ lowerString name ++ "= new " ++ name ++ "();\n" ++
intercalate "\n" (map (\mStatement -> if(isFunctionCall mStatement) then (lowerString name ++ "." ++ show mStatement) else show mStatement) (body ((filter (isMainFunction) bodyArray)!!0))) ++
"}\n" ++
getFunctionString bodyArray ++
"}\n"

show (Import directory moduleName) = "import " ++ directory ++ moduleName
show (Function name argTypes args returnType body) = "public " ++ show returnType ++ " " ++ name ++ "("++ intercalate ", " (zipWith (\x y -> x ++ " " ++ y) (map show argTypes) (map show args)) ++"){\n" ++ intercalate "\n" (map show body) ++ "}"
show (MainFunction name argTypes args returnType body) =
intercalate "\n " $ map show body
show (FunctionCall name exprs) = name ++ "(" ++ (intercalate ", " (map show exprs)) ++ ");"
show (Type b) = b
show (Argument b) = b
show (ArgumentType b) = b
show (ReturnType b) = b
show (AssignArith vType name value) = "" ++ show vType ++ " " ++ name ++ "=" ++ show value ++ ";"
show (AssignString vType name value) = "" ++ show vType ++ " " ++ name ++ "=" ++ show value ++ ";"
show (If condition statement) = "if(" ++ show condition ++ "){\n" ++ intercalate "\n" (map show statement) ++ "}"
show (Else statement) = " else {\n" ++ intercalate "\n" (map show statement) ++ "}"
show (While condition statement) = "while(" ++ show condition ++ "){\n" ++ intercalate "\n" (map show statement) ++ "}"
show (Skip) = "[skip]"
show (Seq s) = "[seq]"
show (Return expr) = "return " ++ show expr ++ ";"
show (Print exprs) = "System.out.println(" ++ exprs ++ ");" --"System.out.println(" ++ intercalate " " (map show exprs) ++ ");"
show (ArrayDef arrType name) = arrType ++ "[] " ++ name ++ "="
show (ArrayValues exprs) = "{" ++ intercalate ", " exprs ++ "};"
show (ArrayAssignment arr values) = show arr ++ show values
show (ArrayElementSelect i) = "[" ++ i ++ "];"
show (Lambda valName collectionName) = ""
show (Where exprs) = intercalate "\n" (map show exprs)
show (StringLiteral value) = "\"" ++ value ++ "\""
show (_) = "<unknown>"

lowerString str = [ toLower loweredString | loweredString <- str]

extractMain :: Expr -> Maybe String
extractMain (MainFunction m _ _ _ _) = Just m
extractMain _ = Nothing

extractFunctionCall :: Expr -> Maybe String
extractFunctionCall (FunctionCall m _) = Just m
extractFunctionCall _ = Nothing

isMainFunction :: Expr -> Bool
isMainFunction e = isJust $ extractMain e

isFunctionCall :: Expr -> Bool
isFunctionCall e = isJust $ extractFunctionCall e

{--
getInnerMainFunctionString :: [Expr] -> String -> String
getInnerMainFunctionString e instanceName = do
if(isMainFunction (e!!0)) then
show (e!!0)
else
getInnerMainFunctionString (drop 1 e) instanceName
--}
getFunctionString :: [Expr] -> String
getFunctionString e = do
if(isMainFunction (e!!0)) then
""
else
"" ++ show (e!!0) ++ getFunctionString (drop 1 e)

代码示例
module IndentationTest
testFunction : int -> void
testFunction x =
if(x < 50)
println x;
nextX :int = x + 1 * 2 - 3 / 2 + 5
testFunction (nextX)
else
last :int = 1000
println last;

main : String -> IO
main args =
x :int = 3
y :int = 10
z :int = 15
arrTest:[int] = [x y z]
println arrTest;
testFunction (x)
stringTest :String = "Helloworld";

这将成功解析示例代码。只需将其传递给 parsePrint 函数。

关于parsing - 使用 Megaparsec 缩进,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/48271208/

29 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com