c++ - 如何结合 boost::spirit::lex 和 boost::spirit::qi？-6ren

c++ - 如何结合 boost::spirit::lex 和 boost::spirit::qi？

转载作者：太空狗更新时间：2023-10-29 20:28:22

我有一个词法分析器，基于该词法分析器，我现在想创建一个使用该词法分析器生成的标记的语法。我尝试改编我发现的一些示例，现在我有一些可以编译和工作的东西至少有一点，但我的一个应该失败的测试却没有。现在我想知道为什么，我也想知道我在那里实际做了什么(我想了解 - 我只是从一些示例中复制了一些代码，但这并没有真正 boost 理解力)。

词法分析器:

#include <boost/spirit/include/lex_lexertl.hpp>

namespace lex = boost::spirit::lex;

enum LexerIDs { ID_IDENTIFIER, ID_WHITESPACE, ID_INTEGER, ID_FLOAT, ID_PUNCTUATOR };

template <typename Lexer>
struct custom_lexer : lex::lexer<Lexer>
{
    custom_lexer()
        : identifier("[a-zA-Z_][a-zA-Z0-9_]*")
        , white_space("[ \\t\\n]+")
        , integer_value("[1-9][0-9]*")
        , hex_value("0[xX][0-9a-fA-F]+")
        , float_value("[0-9]*\\.[0-9]+([eE][+-]?[0-9]+)?")
        , float_value2("[0-9]+\\.([eE][+-]?[0-9]+)?")
        , punctuator("\\[|\\]|\\(|\\)|\\.|&>|\\*\\*|\\*|\\+|-|~|!|\\/|%|<<|>>|<|>|<=|>=|==|!=|\\^|&|\\||\\^\\^|&&|\\|\\||\\?|:|,")// [ ] ( ) . &> ** * + - ~ ! / % << >> < > <= >= == != ^ & | ^^ && || ? : ,
    {
        using boost::spirit::lex::_start;
        using boost::spirit::lex::_end;

        this->self.add
            (identifier, ID_IDENTIFIER) 
            /*(white_space, ID_WHITESPACE)*/ 
            (integer_value, ID_INTEGER)
            (hex_value, ID_INTEGER)
            (float_value, ID_FLOAT)
            (float_value2, ID_FLOAT)
            (punctuator, ID_PUNCTUATOR);

        this->self("WS") = white_space;
    }
    lex::token_def<std::string> identifier;
    lex::token_def<lex::omit> white_space;
    lex::token_def<int> integer_value;
    lex::token_def<int> hex_value;
    lex::token_def<double> float_value;
    lex::token_def<double> float_value2;
    lex::token_def<> punctuator;
};

语法:

namespace qi  = boost::spirit::qi;
namespace lex = boost::spirit::lex;

template< typename Iterator, typename Lexer>
struct custom_grammar : qi::grammar<Iterator, qi::in_state_skipper<Lexer>>
{

    template< typename TokenDef >
    custom_grammar(const TokenDef& tok) : custom_grammar::base_type(ges)
    {
        ges = qi::token(ID_INTEGER) | qi::token(ID_FLOAT);
        BOOST_SPIRIT_DEBUG_NODE(ges);
        debug(ges);
    }
    qi::rule<Iterator, qi::in_state_skipper<Lexer>> ges;
};

例子:

BOOST_AUTO_TEST_CASE(BasicGrammar)
{
    namespace lex = boost::spirit::lex;
    namespace qi = boost::spirit::qi;

    std::string test("1234 56");

    typedef lex::lexertl::token<char const*, lex::omit, boost::mpl::true_> token_type;
    typedef lex::lexertl::lexer<token_type> lexer_type;

    typedef custom_lexer<lexer_type>::iterator_type iterator_type;

    custom_lexer<lexer_type> my_lexer; 
    custom_grammar<iterator_type, custom_lexer<lexer_type>::lexer_def> my_grammar(my_lexer);

    char const* first = test.c_str();
    char const* last = &first[test.size()];

    lexer_type::iterator_type iter = my_lexer.begin(first, last);
    lexer_type::iterator_type end = my_lexer.end();

    bool r = qi::phrase_parse(iter,end,my_grammar, qi::in_state( "WS" )[ my_lexer.self ]);

    BOOST_CHECK(r);
}

我的假设是这会返回 true，因为空格被跳过了——因为 auf qi::in_state("WS")。真的吗？此外，我知道如何为空白输出额外的标记 - 但我不知道在 qi::in_stat 现在所在的位置放什么 - 没有它就无法工作。

有什么我可以改进结构的想法吗？为什么调试输出如此有趣？

<ges>
  <try>[]</try>
  <success></success>
  <attributes>[]</attributes>
</ges>

感谢您的帮助。

问候

托比亚斯

最佳答案

您的解析器没有失败，但它也不是“默默地”跳过空格(无论如何它只解析一个非空格标记)。

事实上，*phrase_parse 系列 Spirit API 的一个属性是它可能无法匹配完整的输入。事实上，这就是它通过引用获取第一个迭代器的原因:在解析之后，迭代器将指示解析停止的位置。

我做了一些改动，因此您可以通过在 lexer_tokens 上使用 lex::tokenize_and_phrase_parse 而不是 qi::phrase_parse 轻松访问源迭代器:

Iterator first = test.c_str();
Iterator last = &first[test.size()];

bool r = lex::tokenize_and_phrase_parse(first,last,my_lexer,my_grammar,qi::in_state( "WS" )[ my_lexer.self ]);

std::cout << std::boolalpha << r << "\n";
std::cout << "Remaining unparsed: '" << std::string(first,last) << "'\n";

输出是:

Remaining unparsed: '56'

这是一个完整的工作示例(注意我还将语法类的第二个参数直接更改为 Skipper，这对于 Spirit 语法来说更为典型):

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>

namespace qi  = boost::spirit::qi;
namespace lex = boost::spirit::lex;

enum LexerIDs { ID_IDENTIFIER, ID_WHITESPACE, ID_INTEGER, ID_FLOAT, ID_PUNCTUATOR };

template <typename Lexer>
struct custom_lexer : lex::lexer<Lexer>
{
    custom_lexer()
        : identifier    ("[a-zA-Z_][a-zA-Z0-9_]*")
        , white_space   ("[ \\t\\n]+")
        , integer_value ("[1-9][0-9]*")
        , hex_value     ("0[xX][0-9a-fA-F]+")
        , float_value   ("[0-9]*\\.[0-9]+([eE][+-]?[0-9]+)?")
        , float_value2  ("[0-9]+\\.([eE][+-]?[0-9]+)?")
        , punctuator    ("\\[|\\]|\\(|\\)|\\.|&>|\\*\\*|\\*|\\+|-|~|!|\\/|%|<<|>>|<|>|<=|>=|==|!=|\\^|&|\\||\\^\\^|&&|\\|\\||\\?|:|,")// [ ] ( ) . &> ** * + - ~ ! / % << >> < > <= >= == != ^ & | ^^ && || ? : ,
    {
        using boost::spirit::lex::_start;
        using boost::spirit::lex::_end;

        this->self.add
            (identifier   , ID_IDENTIFIER)
          /*(white_space  , ID_WHITESPACE)*/
            (integer_value, ID_INTEGER)
            (hex_value    , ID_INTEGER)
            (float_value  , ID_FLOAT)
            (float_value2 , ID_FLOAT)
            (punctuator   , ID_PUNCTUATOR);

        this->self("WS") = white_space;
    }
    lex::token_def<std::string> identifier;
    lex::token_def<lex::omit>   white_space;
    lex::token_def<int>         integer_value;
    lex::token_def<int>         hex_value;
    lex::token_def<double>      float_value;
    lex::token_def<double>      float_value2;
    lex::token_def<>            punctuator;
};

template< typename Iterator, typename Skipper>
struct custom_grammar : qi::grammar<Iterator, Skipper>
{

    template< typename TokenDef >
    custom_grammar(const TokenDef& tok) : custom_grammar::base_type(ges)
    {
        ges = qi::token(ID_INTEGER) | qi::token(ID_FLOAT);
        BOOST_SPIRIT_DEBUG_NODE(ges);
    }
    qi::rule<Iterator, Skipper > ges;
};

int main()
{
    std::string test("1234 56");

    typedef char const* Iterator;
    typedef lex::lexertl::token<Iterator, lex::omit, boost::mpl::true_> token_type;
    typedef lex::lexertl::lexer<token_type> lexer_type;
    typedef qi::in_state_skipper<custom_lexer<lexer_type>::lexer_def> skipper_type;

    typedef custom_lexer<lexer_type>::iterator_type iterator_type;

    custom_lexer<lexer_type> my_lexer; 
    custom_grammar<iterator_type, skipper_type> my_grammar(my_lexer);

    Iterator first = test.c_str();
    Iterator last = &first[test.size()];

    bool r = lex::tokenize_and_phrase_parse(first,last,my_lexer,my_grammar,qi::in_state( "WS" )[ my_lexer.self ]);

    std::cout << std::boolalpha << r << "\n";
    std::cout << "Remaining unparsed: '" << std::string(first,last) << "'\n";
}

关于c++ - 如何结合 boost::spirit::lex 和 boost::spirit::qi？，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/13395453/

文章推荐： c# - MVP，Winforms - 事件处理程序或委托(delegate)

文章推荐： python - 在Python/Pandas中创建部分SAS PROC Summary替换

文章推荐： c# - 如何更改 PropertyGrid 控件的边框颜色(或删除边框)？

文章推荐： python - 循环引用有什么用？

boost - boost boost::spirit::qi以使用STL容器
我正在尝试使用boost.spirit的qi库解析某些内容，而我遇到了一个问题。根据spirit docs，a >> b应该产生类型为tuple的东西。但这是boost::tuple(又名 fusio
boost - 在 CMake 中轻松使用 Boost，无需安装 Boost(Boost CMake 模块化)
似乎有/正在努力做到这一点，但到目前为止我看到的大多数资源要么已经过时(带有死链接)，要么几乎没有信息来实际构建一个小的工作样本(例如，依赖于boost program_options 以构建可执行文
boost - boost.log 是 Boost 的正式一部分吗？
我对 Boost.Log 的状态有点困惑。这是 Boost 的官方部分，还是尚未被接受？当我用谷歌搜索时，我看到一些帖子谈论它在 2010 年是如何被接受的，等等，但是当我查看最后一个 Boost 库
boost - boost::string_ref 和 boost::string_view 的区别
Boost 提供了两种不同的实现 string_view ，这将成为 C++17 的一部分: boost::string_ref在 utility/string_ref.hpp boost::stri
boost - Boost.Geometry是否足够成熟？
最近，我被一家GIS公司雇用来重写他们的旧地理信息库。所以我目前正在寻找一个好的计算几何库。我看过CGAL，这真是了不起，但是我的老板想要免费的东西。所以我现在正在检查Boost.Geometry。
boost - 在图中添加和删除现有边(BOOST)？
假设我有一个无向图 G。假设我添加以下内容 add_edge(1,2,G); add_edge(1,3,G); add_edge(0,2,G); 现在我再说一遍: add_edge(0,2,G); 我
boost - CMake 找到 Boost，但导入的目标不适用于 Boost 版本
我使用 CMake 来查找 Boost。找到了 Boost，但 CMake 出错了 Imported targets not available for Boost version 请参阅下面的完整错
boost - boost::MPL 和 boost::fusion 之间的区别
我是 boost::fusion 和 boost::mpl 库的新手。谁能告诉我这两个库之间的主要区别？到目前为止，我只使用 fusion::vector 和其他一些简单的东西。现在我想使用 fus
boost - boost phoenix什么时候有用？
这个问题已经有答案了: 已关闭10 年前。 Possible Duplicate: What are the benefits of using Boost.Phoenix? 所以我开始阅读 boos
boost - 链接器错误 : Boost. Chrono 到 Boost.Timer
我正在尝试获得一个使用 Boost.Timer 的简单示例，用于一些秒表性能测量，但我不明白为什么我无法成功地将 Boost.Timer 链接到 Boost.Chrono。我使用以下简单脚本从源代码构
boost - C++ boost::shared_ptr & boost::weak_ptr & dynamic_cast
我有这样的东西: enum EFood{ eMeat, eFruit }; class Food{ }; class Meat: public Food{ void someM
boost - Boost::variant与无序映射
有人可以告诉我，我如何获得boost::Variant处理无序地图？ typedef boost::variant lut_value;unordered_map table; 我认为有一个用于boo
boost - boost 几何中的环和多边形有什么区别？
我对 Boost.Geometry 中的环和多边形感到困惑。在文档中，没有图形显示什么是环，什么是多边形。谁能画图解释两个概念的区别？最佳答案在 Boost.Geometry 中，多边形被定义
boost - boost::pool<>::malloc 和 boost::pool<>::ordered_malloc 有什么区别，什么时候应该使用 boost::pool<>::ordered_malloc？
我正在使用 boost.pool，但我不知道何时使用 boost::pool<>::malloc和 boost::pool<>::ordered_malloc ? 所以， boost::pool<>:
c++ - (Boost 库) - boost::container::flat_set with boost::fast_pool_allocator
我正在尝试通过 *boost::fast_pool_allocator* 使用 *boost::container::flat_set*。但是，我收到编译错误。非常感谢您的意见和建议。为了突出这个问题
c++ - boost::bind、boost::asio、boost::thread 和类
sau_timer::sau_timer(int secs, timerparam f) : strnd(io), t(io, boost::posix_time::seconds(secs)
boost - Boost.Graph 中的 boost::out_edges( v, g ) 有什么作用？
我无法理解此功能的文档，我已多次看到以下内容 tie (ei,ei_end) = out_edges(*(vi+a),g); **g**::out_edge_iterator ei, ei_end;
boost-propertytree - 我们如何在另一个 boost ptree 中插入一个 boost ptree 作为节点？
我想在 C++ 中序列化分层数据结构。我正在处理的项目使用 boost，所以我使用 boost::property_tree::ptree 作为我的数据节点结构。我们有像 Person 这样的高级结
c++ - boost::exception_detail::clone_impl>
我需要一些帮助来解决这个异常，我正在实现一个 NPAPI 插件，以便能够使用来自浏览器扩展的本地套接字，为此我正在使用 Firebreath 框架。对于套接字和连接，我使用带有异步调用的 Boost
c++ - boost::bind、boost::function 和 boost::factory 的问题
我尝试将 boost::bind 与 boost::factory 结合使用但没有成功我有这个类 Zambas 有 4 个参数(2 个字符串和 2 个整数)和 class Zambas { publ

太空狗

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

c++ - 如何结合 boost::spirit::lex 和 boost::spirit::qi？