gpt4 book ai didi

c++ - Boost Spirit 和 Lex 解析器问题

转载 作者:塔克拉玛干 更新时间:2023-11-02 23:10:28 25 4
gpt4 key购买 nike

我一直在努力尝试(逐步)修改文档中的示例代码,但没有太大的不同,我没有得到我期望的行为。具体来说,“if”语句在(我的意图是)它应该通过时失败(有一个“else”但解析器的那部分在调试期间被删除)。赋值语句工作正常。我也有一个“while”语句,它与“if”语句有同样的问题,所以我确信如果我能得到帮助来弄清楚为什么一个不工作,那么让另一个继续工作应该很容易。它一定有点微妙,因为这几乎是其中一个示例中的逐字记录。

#include <iostream>
#include <fstream>
#include <string>

#define BOOST_SPIRIT_DEBUG
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>

namespace qi = boost::spirit::qi;
namespace lex = boost::spirit::lex;

inline std::string read_from_file( const char* infile )
{
std::ifstream instream( infile );
if( !instream.is_open() )
{
std::cerr << "Could not open file: \"" << infile << "\"" << std::endl;
exit( -1 );
}
instream.unsetf( std::ios::skipws );
return( std::string(
std::istreambuf_iterator< char >( instream.rdbuf() ),
std::istreambuf_iterator< char >()
) );
}

template< typename Lexer >
struct LangLexer : lex::lexer< Lexer >
{
LangLexer()
{
identifier = "[a-zA-Z][a-zA-Z0-9_]*";
number = "[-+]?(\\d*\\.)?\\d+([eE][-+]?\\d+)?";

if_ = "if";
else_ = "else";

this->self = lex::token_def<> ( '(' ) | ')' | '{' | '}' | '=' | ';';
this->self += identifier | number | if_ | else_;

this->self( "WS" ) = lex::token_def<>( "[ \\t\\n]+" );

}

lex::token_def<> if_, else_;
lex::token_def< std::string > identifier;
lex::token_def< double > number;
};

template< typename Iterator, typename Lexer >
struct LangGrammar : qi::grammar< Iterator, qi::in_state_skipper< Lexer > >
{
template< typename TokenDef >
LangGrammar( const TokenDef& tok ) : LangGrammar::base_type( program )
{
using boost::phoenix::val;
using boost::phoenix::ref;
using boost::phoenix::size;

program = +block;
block = '{' >> *statement >> '}';
statement = assignment | if_stmt;
assignment = ( tok.identifier >> '=' >> expression >> ';' );
if_stmt = ( tok.if_ >> '(' >> expression >> ')' >> block );
expression = ( tok.identifier[ qi::_val = qi::_1 ] | tok.number[ qi::_val = qi::_1 ] );

BOOST_SPIRIT_DEBUG_NODE( program );
BOOST_SPIRIT_DEBUG_NODE( block );
BOOST_SPIRIT_DEBUG_NODE( statement );
BOOST_SPIRIT_DEBUG_NODE( assignment );
BOOST_SPIRIT_DEBUG_NODE( if_stmt );
BOOST_SPIRIT_DEBUG_NODE( expression );
}

qi::rule< Iterator, qi::in_state_skipper< Lexer > > program, block, statement;
qi::rule< Iterator, qi::in_state_skipper< Lexer > > assignment, if_stmt;

typedef boost::variant< double, std::string > expression_type;
qi::rule< Iterator, expression_type(), qi::in_state_skipper< Lexer > > expression;
};

int main( int argc, char** argv )
{
typedef std::string::iterator base_iterator_type;
typedef lex::lexertl::token< base_iterator_type, boost::mpl::vector< double, std::string > > token_type;
typedef lex::lexertl::lexer< token_type > lexer_type;
typedef LangLexer< lexer_type > LangLexer;
typedef LangLexer::iterator_type iterator_type;
typedef LangGrammar< iterator_type, LangLexer::lexer_def > LangGrammar;

LangLexer lexer;
LangGrammar grammar( lexer );

std::string str( read_from_file( 1 == argc ? "boostLexTest.dat" : argv[1] ) );

base_iterator_type strBegin = str.begin();
iterator_type tokenItor = lexer.begin( strBegin, str.end() );
iterator_type tokenItorEnd = lexer.end();

std::cout << std::setfill( '*' ) << std::setw(20) << '*' << std::endl <<
str
<< std::endl << std::setfill( '*' ) << std::setw(20) << '*' << std::endl;

bool result = qi::phrase_parse( tokenItor, tokenItorEnd, grammar, qi::in_state( "WS" )[ lexer.self ] );

if( result )
{
std::cout << "Parsing successful" << std::endl;
}
else
{
std::cout << "Parsing error" << std::endl;
}

return( 0 );
}

这是运行它的输出(读入字符串的文件首先在 main 中转储)

********************
{
a = 5;
if( a ){ b = 2; }
}


********************
<program>
<try>{</try>
<block>
<try>{</try>
<statement>
<try></try>
<assignment>
<try></try>
<expression>
<try></try>
<success>;</success>
<attributes>(5)</attributes>
</expression>
<success></success>
<attributes>()</attributes>
</assignment>
<success></success>
<attributes>()</attributes>
</statement>
<statement>
<try></try>
<assignment>
<try></try>
<fail/>
</assignment>
<if_stmt>
<try>
if(</try>
<fail/>
</if_stmt>
<fail/>
</statement>
<fail/>
</block>
<fail/>
</program>
Parsing error

最佳答案

问题在于您将标记定义添加到词法分析器的顺序。你的代码

this->self += identifier | number | if_ | else_; 

首先添加 identifier 标记,它也将完美匹配“if”(以及任何其他关键字)。如果你把它改成

this->self += if_ | else_ | identifier | number; 

一切都开始正常工作。

这不是 Spirit.Lex 特有的。任何分词器都遵循定义分词的顺序来确定匹配的优先级。

关于c++ - Boost Spirit 和 Lex 解析器问题,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/2760812/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com