gpt4 book ai didi

c++ - Spirit 在仅出现从 Lexer 获得第一个符号后无法解析

转载 作者:行者123 更新时间:2023-11-30 03:22:39 31 4
gpt4 key购买 nike

最近在这里问了一个问题: Boost Spirit Segfault In Parser

在这篇文章中,有人指出我正在使用的语法是绝对左递归的,这种精神是 PEG 解析器生成器,这意味着左递归是不可能的。

我将语法转换为非左递归语法,使用龙书中处理左递归部分的规则。

给定一个左递归文法

A -> A >> alpha | beta

可以通过以下方式转换为等价的右递归文法:

A -> beta >> A'

A' -> alpha >> A' | epsilon

这是生成的解析器,我认为是非左递归产生式:

namespace interpreter {
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{
template <typename TokenDef>
InterpreterGrammar(TokenDef const& tok)
: InterpreterGrammar::base_type(start)
{
using boost::phoenix::ref;

start %= functionList >> endList >> qi::eoi;

// different expressions
exp %=
qi::token(k_alphaTerminal) >> qi::token(k_equalTo) >> qi::token(k_alphaTerminal) >> expPrime
|
qi::token(k_numericTerminal) >> expPrime
|
qi::token(k_trueTok) >> expPrime
|
qi::token(k_falseTok) >> expPrime;

expPrime %=
qi::token(k_equalTo) >> exp >> expPrime
|
qi::token(k_notEq) >> exp >> expPrime
|
qi::token(k_less) >> exp >> expPrime
|
qi::token(k_lessEq) >> exp >> expPrime
|
qi::token(k_greater) >> exp >> expPrime
|
qi::token(k_greaterEq) >> exp >> expPrime
|
qi::token(k_andTok) >> exp >> expPrime
|
qi::token(k_orTok) >> exp >> expPrime
|
qi::token(k_notTok) >> exp
|
qi::token(k_plues) >> exp >> expPrime
|
qi::token(k_minus) >> exp >> expPrime
|
qi::token(k_mult) >> exp >> expPrime
|
qi::token(k_minus) >> exp
|
qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
|
qi::eps;

// parameter list
paramList %= exp >> paramListPrime;

paramListPrime %= qi::token(k_comma) >> exp >> paramListPrime
|
qi::eps;

// return statements
returnStatement %= qi::token(k_returnTok) >> exp
|
qi::token(k_returnTok);

// function call statements
callStatement %= qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen);

// variable assignment
assignmentStatement %= qi::token(k_alphaTerminal) >> qi::token(k_assign) >> exp
|
qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp
>> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp;

// list of integers
intList %= qi::token(k_numericTerminal) >> intListPrime;

intListPrime %=
qi::token(k_comma) >> qi::token(k_numericTerminal) >> intListPrime
|
qi::eps;

// print out a variable
printStatement %= qi::token(k_print) >> exp;

// take input
inputStatement %= qi::token(k_alphaTerminal) >> qi::token(k_input);

// conditional statement
conditionStatement %= qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse;

// consitions have optional else
optionalElse %= qi::token(k_elseTok) >> qi::token(k_colon) >> statements
|
qi::eps;

// while loop
whileStatement %= qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw);

// actual program statements
endList %= end >> endListPrime;

endListPrime %= end >> endListPrime
|
qi::eps;

// end possibilities of program in global space
end %= callStatement
|
printStatement
|
qi::token(k_alphaTerminal) >> qi::token(k_assign) >> qi::token(k_input)
|
qi::token(k_alphaTerminal) >> qi::token(k_assign) >> exp
|
qi::token(k_alphaTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
>> qi::token(k_rightBracket)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
>> qi::token(k_assign) >> exp;

// function parameters
param %=
qi::token(k_alphaTerminal) >> paramPrime
|
qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket)
>> paramPrime;

// for handling left recursion in paramlist
paramPrime %=
qi::token(k_comma) >> qi::token(k_alphaTerminal) >> paramPrime
|
qi::eps;


// define a statement as assignment print input condition while or call
statement %=
assignmentStatement
|
printStatement
|
inputStatement
|
conditionStatement
|
whileStatement
|
callStatement
|
returnStatement;

// general statement list
statements %= statement >> statementsPrime;

// for handling left recursion in statements
statementsPrime %= statement >> statementsPrime
|
qi::eps;

// functions
functionList %= qi::token(k_def) >> qi::token(k_alphaTerminal) >> qi::token(k_leftParen)
>> param >> qi::token(k_rightParen) >> qi::token(k_colon)
>> statements >> qi::token(k_fed)
|
qi::token(k_def) >> qi::token(k_alphaTerminal) >> qi::token(k_leftParen)
>> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed)
| qi::eps;

BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
debug(start);
}
qi::rule<Iterator, Skipper> start;
qi::rule<Iterator, Skipper> functionList;
qi::rule<Iterator, Skipper> endList;
qi::rule<Iterator, Skipper> endListPrime;
qi::rule<Iterator, Skipper> param;
qi::rule<Iterator, Skipper> paramPrime;
qi::rule<Iterator, Skipper> paramList;
qi::rule<Iterator, Skipper> paramListPrime;
qi::rule<Iterator, Skipper> statements;
qi::rule<Iterator, Skipper> statementsPrime;
qi::rule<Iterator, Skipper> statement;
qi::rule<Iterator, Skipper> assignmentStatement;
qi::rule<Iterator, Skipper> printStatement;
qi::rule<Iterator, Skipper> inputStatement;
qi::rule<Iterator, Skipper> conditionStatement;
qi::rule<Iterator, Skipper> whileStatement;
qi::rule<Iterator, Skipper> callStatement;
qi::rule<Iterator, Skipper> returnStatement;
qi::rule<Iterator, Skipper> exp;
qi::rule<Iterator, Skipper> expPrime;
qi::rule<Iterator, Skipper> intList;
qi::rule<Iterator, Skipper> intListPrime;
qi::rule<Iterator, Skipper> optionalElse;
qi::rule<Iterator, Skipper> end;
};

这是词法分析器

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <fstream>
#include <streambuf>

#include <boost/bind.hpp>
#include <boost/ref.hpp>

namespace interpreter
{
namespace lex = boost::spirit::lex;

enum Tokens
{
k_andTok = 1,
k_def = 2,
k_elihw = 3,
k_elseTok = 4,
k_falseTok = 5,
k_fed = 6,
k_fi = 7,
k_ifTok = 8,
k_input = 9,
k_notTok = 10,
k_orTok = 11,
k_print = 12,
k_returnTok = 13,
k_trueTok = 14,
k_whileTok = 15,
k_plues = 16,
k_minus = 17,
k_mult = 18,
k_div = 19,
k_bang = 20,
k_equalTo = 21,
k_greaterEq = 22,
k_lessEq = 23,
k_notEq = 24,
k_less = 25,
k_greater = 26,
k_assign = 27,
k_comma = 28,
k_colon = 29,
k_leftParen = 30,
k_rightParen = 31,
k_leftBracket = 32,
k_rightBracket = 33,
k_alphaTerminal = 34,
k_numericTerminal = 35
};

template <typename Lexer>
struct LexerTokens : lex::lexer<Lexer>
{
LexerTokens() :
whiteSpace("[ \\t\\n]"),
andTok("and"),
def("def"),
elihw("elihw"),
elseTok("else"),
falseTok("false"),
fed("fed"),
fi("fi"),
ifTok("if"),
input("input"),
notTok("not"),
orTok("or"),
print("print"),
returnTok("return"),
trueTok("true"),
whileTok("while"),
plus("\\+"),
minus("\\-"),
mult("\\*"),
div("\\/"),
bang("\\!"),
equalTo("=="),
greaterEq(">="),
lessEq("<="),
notEq("!="),
less("<"),
greater(">"),
assign("="),
comma(","),
colon(":"),
leftParen("\\("),
rightParen("\\)"),
leftBracket("\\["),
rightBracket("\\["),
alphaTerminal("[a-z][a-zA-Z0-9]*"),
numericTerminal("[0-9]*")
{
this->self("WHITESPACE") = whiteSpace;

this->self.add
(andTok, k_andTok)
(def, k_def)
(elihw, k_elihw)
(elseTok, k_elseTok)
(falseTok, k_falseTok)
(fed, k_fed)
(fi, k_fi)
(ifTok, k_ifTok)
(andTok, k_andTok)
(input, k_input)
(notTok, k_notTok)
(orTok, k_orTok)
(print, k_print)
(returnTok, k_returnTok)
(trueTok, k_trueTok)
(whileTok, k_whileTok)
(plus, k_plues)
(minus, k_minus)
(mult, k_mult)
(div, k_div)
(bang, k_bang)
(equalTo, k_equalTo)
(greaterEq, k_greaterEq)
(lessEq, k_lessEq)
(notEq, k_notEq)
(less, k_less)
(greater, k_greater)
(assign, k_assign)
(comma, k_comma)
(colon, k_colon)
(leftParen, k_leftParen)
(rightParen, k_rightParen)
(leftBracket, k_leftBracket)
(rightBracket, k_rightBracket)
(alphaTerminal, k_alphaTerminal)
(numericTerminal, k_numericTerminal);
}

lex::token_def<lex::omit> whiteSpace;
lex::token_def<std::string> andTok;
lex::token_def<std::string> def;
lex::token_def<std::string> elihw;
lex::token_def<std::string> elseTok;
lex::token_def<std::string> falseTok;
lex::token_def<std::string> fed;
lex::token_def<std::string> fi;
lex::token_def<std::string> ifTok;
lex::token_def<std::string> input;
lex::token_def<std::string> notTok;
lex::token_def<std::string> orTok;
lex::token_def<std::string> print;
lex::token_def<std::string> returnTok;
lex::token_def<std::string> trueTok;
lex::token_def<std::string> whileTok;
lex::token_def<std::string> plus;
lex::token_def<std::string> minus;
lex::token_def<std::string> mult;
lex::token_def<std::string> div;
lex::token_def<std::string> bang;
lex::token_def<std::string> equalTo;
lex::token_def<std::string> greaterEq;
lex::token_def<std::string> lessEq;
lex::token_def<std::string> notEq;
lex::token_def<std::string> less;
lex::token_def<std::string> greater;
lex::token_def<std::string> assign;
lex::token_def<std::string> comma;
lex::token_def<std::string> colon;
lex::token_def<std::string> leftParen;
lex::token_def<std::string> rightParen;
lex::token_def<std::string> leftBracket;
lex::token_def<std::string> rightBracket;
lex::token_def<std::string> alphaTerminal;
lex::token_def<std::string> numericTerminal;
};

这是我的示例测试程序

    int main(int argc, char** argv)
{
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;

typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;

interpreter::LexerTokens< lexer_type > lexer;
interpreter::InterpreterGrammar< iterator_type, skipper_type > parser(lexer);
std::string sourceCode("def print_it(x, y): print 3*x + y return fed print_it(8,1) x = 3 print_it(x, x)");

char const* first = sourceCode.c_str();
char const* last = &first[sourceCode.size()];
bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);

std::cout << "Remaining " << std::string(first,last) << std::endl;
std::cout << "R is " << r << std::endl;
}

这些修改引起了几个问题,第一,通过非正式地看文法,没有构造完整的LL(1)解析表,我不相信这个文法是LL(1)。我仍然需要验证这一点,但我想知道精神,能够解析这个语法吗?我知道 PEG 通常使用/运算符进行前瞻,精神目前是否这样做?我在另一篇文章中看到精神可能不会?

其次,这个语法失败了,我注意到在开始生产时,我简化了开始生产并使其成为:

start %= functionList;

然后将输入更改为:

def print_it(x, y): print 3*x + y return fed

语法调试语句指出解析成功。但是,我看到剩余的字符串是:

print_it(x, y): print 3*x + y return fed

所以实际上只解析了第一个标记。经过一些调试后,我不确定为什么解析成功以及为什么只消耗了一个符号,这可能是词法分析器的问题吗?

此外,当我将开始生产更改为:

start %= endList;

并使用输入

y = x

然而,这无法解析并且只消耗字符 y。

最后,我的调试语句的输出不是很有帮助,当使用调试语句运行时产生的输出是:

<start>
<try>[][][][][][][][][][][][][][][][][][][][]</try>
<fail/>
</start>
Remaining print_it(x, y): print 3*x + y return fed print_it(8,1) x = 3 print_it(x, x)
R is 0

我假设这意味着在语法中尝试了二十个产生式,来自二十个空 [],这是一个正确的假设吗?还有为什么 [] 是空的?我通常会看到它们带有一些对调试有用的文本。是不是因为匹配到正则表达式语法就自动成功了?如果是这种情况,有没有办法让调试语句在使用 token 枚举 token 而不是使用宏添加表达式时打印有用的输出?

如有任何帮助或指出正确方向,我们将不胜感激,谢谢。

最佳答案

Which I assume to mean twenty productions are attempted in the grammar, from the twenty empty [], is that a correct assumption?

没有。 [] 表示输入标记。

Also why are the [] empty?

可能没有有用的方法来打印它们,因此它们显示为空。

If that is the case, is there a way to get the debug statement to print helpful output when using the token enum token as opposed to adding expressions using macros?

我也这么认为。但我从不使用 Lex。所以,可能需要一段时间才能弄清楚。

首先引起我注意的是:

typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;

您的 AttributeTypes 表示 omit。确实,改为

typedef lex::lexertl::token< char const*, boost::mpl::vector<lex::omit, std::string>, boost::mpl::true_ > token_type;

确实显示出生命迹象。对于输入 x=y(没有空格!)它打印:

Live On Coliru

<start>
<try>[y][=][x][][][][][][][][][][][][][][][][][]</try>
<fail/>
</start>
Remaining

R is false

现在,对于 def print_it(x, y): print 3*x + y return fed 输入,输出仍然是:

<start>
<try>[def][][][][][][][][][][][][][][][][][][][]</try>
<fail/>
</start>
Remaining print_it(x, y): print 3*x + y return fed

R is false

信息量稍大。有趣的是,它似乎在第一个空格上也失败了。 whiteSpace 正则表达式看起来没问题,所以我 searched for lex in_state in my answers记住我曾经学到的关于 skip 和 Lex 的知识。

我尝试了一些建议。在第二篇文章之后,我看到了这条评论:

Nice! You can add to your answer that having a separate state inside the lexer for skipping white spaces has another considerable drawback: lack of debugging support. If you use a separate state for skipping and then use BOOST_SPIRIT_DEBUG_NODE, you won't get the full output of the token stream. This is because the default debug handler advances the lexer iterator to get tokens, the lexer is of course in the INITIAL state. As soon as it meets a white space, the iteration stops, because the lexer cannot find a match. The token stream will be cut at the first white space in the rule's trace. – noxmetus Sep 20 '17 at 18:28

让我们,只是为了好玩,尝试不使用单独的状态,而是使用来自 Troubles with boost::spirit::lex & whitespace 的状态。 .

现在,表达式无法解析,因为

  • print_it 不是有效的标识符。让我们将 _ 添加到 alphaTerminal
  • alphaTerminal 需要 = 跟随。稍微修改一下表达式:

    exp = (
    qi::token(k_alphaTerminal)
    | qi::token(k_numericTerminal)
    | qi::token(k_trueTok)
    | qi::token(k_falseTok))
    >> expPrime
    ;
  • 事实上,这些 token ID 是无效的。您需要从 min_token_id(即 0x10000)开始:

    enum Tokens
    {
    k_andTok = lex::tokenids::min_token_id + 1,
    k_def, k_elihw, k_elseTok, k_falseTok, k_fed, k_fi, k_ifTok, k_input,
    k_notTok, k_orTok, k_print, k_returnTok, k_trueTok, k_whileTok,
    k_plues, k_minus, k_mult, k_div, k_bang, k_equalTo, k_greaterEq,
    k_lessEq, k_notEq, k_less, k_greater, k_assign, k_comma, k_colon,
    k_leftParen, k_rightParen, k_leftBracket, k_rightBracket,
    k_alphaTerminal, k_numericTerminal,
    };
  • 为什么您甚至想要指定 token ID?根据您正在将对 token 定义的引用传递给解析器的事实进行猜测,我认为您会使用它们?

    exp
    = (tok.alphaTerminal | tok.numericTerminal | tok.trueTok | tok.falseTok) >> expPrime
    ;
  • 此外,让我们为所有规则启用调试:

    BOOST_SPIRIT_DEBUG_NODES(
    (start) (functionList) (endList) (endListPrime) (param)
    (paramPrime) (paramList) (paramListPrime) (statements)
    (statementsPrime) (statement) (assignmentStatement)
    (printStatement) (inputStatement) (conditionStatement)
    (whileStatement) (callStatement) (returnStatement) (exp)
    (expPrime) (intList) (intListPrime) (optionalElse) (end)
    )

而且,在花了太多时间试图理解为什么状态切换船长似乎不起作用之后,我放弃了。这是我修补的结果:

Live On Coliru

//#define USE_STATES
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/lex.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/phoenix.hpp>
namespace lex = boost::spirit::lex;

namespace interpreter {
enum Tokens
{
k_andTok = lex::tokenids::min_token_id + 1,
k_def, k_elihw, k_elseTok, k_falseTok, k_fed, k_fi, k_ifTok, k_input,
k_notTok, k_orTok, k_print, k_returnTok, k_trueTok, k_whileTok,
k_plues, k_minus, k_mult, k_div, k_bang, k_equalTo, k_greaterEq,
k_lessEq, k_notEq, k_less, k_greater, k_assign, k_comma, k_colon,
k_leftParen, k_rightParen, k_leftBracket, k_rightBracket,
k_alphaTerminal, k_numericTerminal,
};

template <typename Lexer>
struct LexerTokens : lex::lexer<Lexer>
{
LexerTokens() :
whiteSpace("[ \\t\\n]"),
andTok("and"),
def("def"),
elihw("elihw"),
elseTok("else"),
falseTok("false"),
fed("fed"),
fi("fi"),
ifTok("if"),
input("input"),
notTok("not"),
orTok("or"),
print("print"),
returnTok("return"),
trueTok("true"),
whileTok("while"),
plus("\\+"),
minus("\\-"),
mult("\\*"),
div("\\/"),
bang("\\!"),
equalTo("=="),
greaterEq(">="),
lessEq("<="),
notEq("!="),
less("<"),
greater(">"),
assign("="),
comma(","),
colon(":"),
leftParen("\\("),
rightParen("\\)"),
leftBracket("\\["),
rightBracket("\\["),
alphaTerminal("[a-z][a-zA-Z0-9_]*"),
numericTerminal("[0-9]*")
{
this->self.add
(andTok, k_andTok)
(def, k_def)
(elihw, k_elihw)
(elseTok, k_elseTok)
(falseTok, k_falseTok)
(fed, k_fed)
(fi, k_fi)
(ifTok, k_ifTok)
(andTok, k_andTok)
(input, k_input)
(notTok, k_notTok)
(orTok, k_orTok)
(print, k_print)
(returnTok, k_returnTok)
(trueTok, k_trueTok)
(whileTok, k_whileTok)
(plus, k_plues)
(minus, k_minus)
(mult, k_mult)
(div, k_div)
(bang, k_bang)
(equalTo, k_equalTo)
(greaterEq, k_greaterEq)
(lessEq, k_lessEq)
(notEq, k_notEq)
(less, k_less)
(greater, k_greater)
(assign, k_assign)
(comma, k_comma)
(colon, k_colon)
(leftParen, k_leftParen)
(rightParen, k_rightParen)
(leftBracket, k_leftBracket)
(rightBracket, k_rightBracket)
(alphaTerminal, k_alphaTerminal)
(numericTerminal, k_numericTerminal);

#ifdef USE_STATES
this->self("WHITESPACE") = whiteSpace;
#else
this->self += whiteSpace [ lex::_pass = lex::pass_flags::pass_ignore ];
#endif
}

lex::token_def<lex::omit> whiteSpace;
lex::token_def<std::string> andTok;
lex::token_def<std::string> def;
lex::token_def<std::string> elihw;
lex::token_def<std::string> elseTok;
lex::token_def<std::string> falseTok;
lex::token_def<std::string> fed;
lex::token_def<std::string> fi;
lex::token_def<std::string> ifTok;
lex::token_def<std::string> input;
lex::token_def<std::string> notTok;
lex::token_def<std::string> orTok;
lex::token_def<std::string> print;
lex::token_def<std::string> returnTok;
lex::token_def<std::string> trueTok;
lex::token_def<std::string> whileTok;
lex::token_def<std::string> plus;
lex::token_def<std::string> minus;
lex::token_def<std::string> mult;
lex::token_def<std::string> div;
lex::token_def<std::string> bang;
lex::token_def<std::string> equalTo;
lex::token_def<std::string> greaterEq;
lex::token_def<std::string> lessEq;
lex::token_def<std::string> notEq;
lex::token_def<std::string> less;
lex::token_def<std::string> greater;
lex::token_def<std::string> assign;
lex::token_def<std::string> comma;
lex::token_def<std::string> colon;
lex::token_def<std::string> leftParen;
lex::token_def<std::string> rightParen;
lex::token_def<std::string> leftBracket;
lex::token_def<std::string> rightBracket;
lex::token_def<std::string> alphaTerminal;
lex::token_def<std::string> numericTerminal;
};

namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{
template <typename TokenDef>
InterpreterGrammar(TokenDef const& tok)
: InterpreterGrammar::base_type(start)
{
start
= functionList >> endList >> qi::eoi
;

// different expressions
exp
= (tok.alphaTerminal | tok.numericTerminal | tok.trueTok | tok.falseTok) >> expPrime
;

expPrime
= tok.equalTo >> exp >> expPrime
| tok.notEq >> exp >> expPrime
| tok.less >> exp >> expPrime
| tok.lessEq >> exp >> expPrime
| tok.greater >> exp >> expPrime
| tok.greaterEq >> exp >> expPrime
| tok.andTok >> exp >> expPrime
| tok.orTok >> exp >> expPrime
| tok.notTok >> exp
| tok.plus >> exp >> expPrime
| tok.minus >> exp >> expPrime
| tok.mult >> exp >> expPrime
| tok.minus >> exp
| tok.leftParen >> exp >> tok.rightParen
| tok.alphaTerminal >> tok.leftBracket >> exp >> tok.rightBracket
| tok.alphaTerminal >> tok.leftParen >> tok.rightParen
| tok.alphaTerminal >> tok.leftParen >> exp >> tok.rightParen
| qi::eps
;

// parameter list
paramList
= exp >> paramListPrime
;

paramListPrime
= tok.comma >> exp >> paramListPrime
| qi::eps
;

// return statements
returnStatement
= tok.returnTok >> exp
| tok.returnTok
;

// function call statements
callStatement
= tok.alphaTerminal >> tok.leftParen >> tok.rightParen
| tok.alphaTerminal >> tok.leftParen >> paramList >> tok.rightParen
;

// variable assignment
assignmentStatement
= tok.alphaTerminal >> tok.assign >> exp
| tok.alphaTerminal >> tok.leftBracket >> exp
>> tok.rightBracket >> tok.assign >> exp
;

// list of integers
intList
= tok.numericTerminal >> intListPrime
;

intListPrime
= tok.comma >> tok.numericTerminal >> intListPrime
| qi::eps
;

// print out a variable
printStatement
= tok.print >> exp
;

// take input
inputStatement
= tok.alphaTerminal >> tok.input
;

// conditional statement
conditionStatement
= tok.ifTok >> exp >> tok.colon >> statements >> optionalElse
;

// consitions have optional else
optionalElse
= tok.elseTok >> tok.colon >> statements
| qi::eps
;

// while loop
whileStatement
= tok.whileTok >> exp >> tok.colon >> statements >> tok.elihw
;

// actual program statements
endList
= end >> endListPrime
;

endListPrime
= end >> endListPrime
| qi::eps
;

// end possibilities of program in global space
end
= callStatement
| printStatement
| tok.alphaTerminal >> tok.assign >> tok.input
| tok.alphaTerminal >> tok.assign >> exp
| tok.alphaTerminal >> tok.assign >> tok.leftBracket >> intList
>> tok.rightBracket
| tok.alphaTerminal >> tok.leftBracket >> exp >> tok.rightBracket
>> tok.assign >> exp
;

// function parameters
param
= tok.alphaTerminal >> paramPrime
| tok.alphaTerminal >> tok.leftBracket >> tok.rightBracket
>> paramPrime
;

// for handling left recursion in paramlist
paramPrime
= tok.comma >> tok.alphaTerminal >> paramPrime
| qi::eps
;

// define a statement as assignment print input condition while or call
statement
= assignmentStatement
| printStatement
| inputStatement
| conditionStatement
| whileStatement
| callStatement
| returnStatement
;

// general statement list
statements
= statement >> statementsPrime
;

// for handling left recursion in statements
statementsPrime
= statement >> statementsPrime
| qi::eps
;

// functions
functionList
= tok.def >> tok.alphaTerminal >> tok.leftParen
>> param >> tok.rightParen >> tok.colon
>> statements >> tok.fed
| tok.def >> tok.alphaTerminal >> tok.leftParen
>> tok.rightParen >> tok.colon >> statements >> tok.fed
| qi::eps
;

BOOST_SPIRIT_DEBUG_NODES(
(start) (functionList) (endList) (endListPrime) (param)
(paramPrime) (paramList) (paramListPrime) (statements)
(statementsPrime) (statement) (assignmentStatement)
(printStatement) (inputStatement) (conditionStatement)
(whileStatement) (callStatement) (returnStatement) (exp)
(expPrime) (intList) (intListPrime) (optionalElse) (end)
)

}
private:
qi::rule<Iterator, Skipper> start;
qi::rule<Iterator, Skipper> functionList;
qi::rule<Iterator, Skipper> endList;
qi::rule<Iterator, Skipper> endListPrime;
qi::rule<Iterator, Skipper> param;
qi::rule<Iterator, Skipper> paramPrime;
qi::rule<Iterator, Skipper> paramList;
qi::rule<Iterator, Skipper> paramListPrime;
qi::rule<Iterator, Skipper> statements;
qi::rule<Iterator, Skipper> statementsPrime;
qi::rule<Iterator, Skipper> statement;
qi::rule<Iterator, Skipper> assignmentStatement;
qi::rule<Iterator, Skipper> printStatement;
qi::rule<Iterator, Skipper> inputStatement;
qi::rule<Iterator, Skipper> conditionStatement;
qi::rule<Iterator, Skipper> whileStatement;
qi::rule<Iterator, Skipper> callStatement;
qi::rule<Iterator, Skipper> returnStatement;
qi::rule<Iterator, Skipper> exp;
qi::rule<Iterator, Skipper> expPrime;
qi::rule<Iterator, Skipper> intList;
qi::rule<Iterator, Skipper> intListPrime;
qi::rule<Iterator, Skipper> optionalElse;
qi::rule<Iterator, Skipper> end;
};
}

#include <fstream>
#include <iterator>

int main(int argc, char** argv) {
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;

typedef lex::lexertl::token<char const*, boost::mpl::vector<lex::omit, std::string>, boost::mpl::true_> token_type;
#ifdef USE_STATES
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
#else
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
typedef qi::unused_type skipper_type;
#endif

interpreter::LexerTokens<lexer_type> lexer;
interpreter::InterpreterGrammar<iterator_type, skipper_type> parser(lexer);

// read the file
if (argc != 2)
{
std::cout << "File required" << std::endl;
return 1;
}

std::ifstream t(argv[1]);
std::string const sourceCode { std::istreambuf_iterator<char>(t), {} };

char const* first = sourceCode.data();
char const* last = first + sourceCode.size();
#ifdef USE_STATES
bool ok = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
#else
bool ok = lex::tokenize_and_parse(first, last, lexer, parser);
#endif

std::cout << "Remaining '" << std::string(first,last) << "'" << std::endl;
std::cout << "R is " << std::boolalpha << ok << std::endl;
}

打印

<start>
<try>[def][print_it][(][x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
<functionList>
<try>[def][print_it][(][x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
<param>
<try>[x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
<paramPrime>
<try>[,][y][)][:][print][3][*][x][+][y][return][fed]</try>
<paramPrime>
<try>[)][:][print][3][*][x][+][y][return][fed]</try>
<success>[)][:][print][3][*][x][+][y][return][fed]</success>
<attributes>[]</attributes>
</paramPrime>
<success>[)][:][print][3][*][x][+][y][return][fed]</success>
<attributes>[]</attributes>
</paramPrime>
<success>[)][:][print][3][*][x][+][y][return][fed]</success>
<attributes>[]</attributes>
</param>
<statements>
<try>[print][3][*][x][+][y][return][fed]</try>
<statement>
<try>[print][3][*][x][+][y][return][fed]</try>
<assignmentStatement>
<try>[print][3][*][x][+][y][return][fed]</try>
<fail/>
</assignmentStatement>
<printStatement>
<try>[print][3][*][x][+][y][return][fed]</try>
<exp>
<try>[3][*][x][+][y][return][fed]</try>
<expPrime>
<try>[*][x][+][y][return][fed]</try>
<exp>
<try>[x][+][y][return][fed]</try>
<expPrime>
<try>[+][y][return][fed]</try>
<exp>
<try>[y][return][fed]</try>
<expPrime>
<try>[return][fed]</try>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</exp>
<expPrime>
<try>[return][fed]</try>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</exp>
<expPrime>
<try>[return][fed]</try>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</exp>
<success>[return][fed]</success>
<attributes>[]</attributes>
</printStatement>
<success>[return][fed]</success>
<attributes>[]</attributes>
</statement>
<statementsPrime>
<try>[return][fed]</try>
<statement>
<try>[return][fed]</try>
<assignmentStatement>
<try>[return][fed]</try>
<fail/>
</assignmentStatement>
<printStatement>
<try>[return][fed]</try>
<fail/>
</printStatement>
<inputStatement>
<try>[return][fed]</try>
<fail/>
</inputStatement>
<conditionStatement>
<try>[return][fed]</try>
<fail/>
</conditionStatement>
<whileStatement>
<try>[return][fed]</try>
<fail/>
</whileStatement>
<callStatement>
<try>[return][fed]</try>
<fail/>
</callStatement>
<returnStatement>
<try>[return][fed]</try>
<exp>
<try>[fed]</try>
<fail/>
</exp>
<success>[fed]</success>
<attributes>[]</attributes>
</returnStatement>
<success>[fed]</success>
<attributes>[]</attributes>
</statement>
<statementsPrime>
<try>[fed]</try>
<statement>
<try>[fed]</try>
<assignmentStatement>
<try>[fed]</try>
<fail/>
</assignmentStatement>
<printStatement>
<try>[fed]</try>
<fail/>
</printStatement>
<inputStatement>
<try>[fed]</try>
<fail/>
</inputStatement>
<conditionStatement>
<try>[fed]</try>
<fail/>
</conditionStatement>
<whileStatement>
<try>[fed]</try>
<fail/>
</whileStatement>
<callStatement>
<try>[fed]</try>
<fail/>
</callStatement>
<returnStatement>
<try>[fed]</try>
<fail/>
</returnStatement>
<fail/>
</statement>
<success>[fed]</success>
<attributes>[]</attributes>
</statementsPrime>
<success>[fed]</success>
<attributes>[]</attributes>
</statementsPrime>
<success>[fed]</success>
<attributes>[]</attributes>
</statements>
<success></success>
<attributes>[]</attributes>
</functionList>
<endList>
<try></try>
<end>
<try></try>
<callStatement>
<try></try>
<fail/>
</callStatement>
<printStatement>
<try></try>
<fail/>
</printStatement>
<fail/>
</end>
<fail/>
</endList>
<fail/>
</start>
Remaining ''
R is false

关于c++ - Spirit 在仅出现从 Lexer 获得第一个符号后无法解析,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/50980148/

31 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com