gpt4 book ai didi

c++ - 使用boost精神qi处理gor解析bnf语法的多行规则

转载 作者:行者123 更新时间:2023-12-02 10:29:14 25 4
gpt4 key购买 nike

假设我有这样的 BNF 语法

<code>   ::=  <letter><digit> | <letter><digit><code>
<letter> ::= a | b | c | d | e
| f | g | h | i
<digit> ::= 0 | 1 | 2 | 3 |
4
如果您查看 <letter>规则,它的延续以 | 开始。但是 <digit>规则以 | 的产生式开始出现在上一行的末尾。我也不想使用特定的符号来表示规则的结束。
如何检查规则是否结束,使用增强灵气执行。
我刚刚浏览了 boost 页面上的教程,想知道我将如何处理这个问题。

最佳答案

Wikipedia

BNF syntax can only represent a rule in one line, whereas in EBNF a terminating character, the semicolon character “;” marks the end of a rule.


所以简单的答案是:输入不是 BNF。
如果您无论如何都想支持它(后果自负:)),您将不得不这样做。所以,让我们写一个简单的 BFN 语法,从字面上映射 Wikipedia BNF
<syntax>         ::= <rule> | <rule> <syntax>
<rule> ::= <opt-whitespace> "<" <rule-name> ">" <opt-whitespace> "::=" <opt-whitespace> <expression> <line-end>
<opt-whitespace> ::= " " <opt-whitespace> | ""
<expression> ::= <list> | <list> <opt-whitespace> "|" <opt-whitespace> <expression>
<line-end> ::= <opt-whitespace> <EOL> | <line-end> <line-end>
<list> ::= <term> | <term> <opt-whitespace> <list>
<term> ::= <literal> | "<" <rule-name> ">"
<literal> ::= '"' <text1> '"' | "'" <text2> "'"
<text1> ::= "" | <character1> <text1>
<text2> ::= '' | <character2> <text2>
<character> ::= <letter> | <digit> | <symbol>
<letter> ::= "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
<digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
<symbol> ::= "|" | " " | "!" | "#" | "$" | "%" | "&" | "(" | ")" | "*" | "+" | "," | "-" | "." | "/" | ":" | ";" | ">" | "=" | "<" | "?" | "@" | "[" | "\" | "]" | "^" | "_" | "`" | "{" | "}" | "~"
<character1> ::= <character> | "'"
<character2> ::= <character> | '"'
<rule-name> ::= <letter> | <rule-name> <rule-char>
<rule-char> ::= <letter> | <digit> | "-"
它可能看起来像这样:
template <typename Iterator>
struct BNF: qi::grammar<Iterator, Ast::Syntax()> {
BNF(): BNF::base_type(start) {
using namespace qi;
start = skip(blank) [ _rule % +eol ];

_rule = _rule_name >> "::=" >> _expression;
_expression = _list % '|';
_list = +_term;
_term = _literal | _rule_name;
_literal = '"' >> *(_character - '"') >> '"'
| "'" >> *(_character - "'") >> "'";
_character = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\\^_`{}~[-");
_rule_name = '<' >> (alpha >> *(alnum | char_('-'))) >> '>';

BOOST_SPIRIT_DEBUG_NODES(
(_rule)(_expression)(_list)(_term)
(_literal)(_character)
(_rule_name))
}

private:
qi::rule<Iterator, Ast::Syntax()> start;
qi::rule<Iterator, Ast::Rule(), qi::blank_type> _rule;
qi::rule<Iterator, Ast::Expression(), qi::blank_type> _expression;
qi::rule<Iterator, Ast::List(), qi::blank_type> _list;
// lexemes
qi::rule<Iterator, Ast::Term()> _term;
qi::rule<Iterator, Ast::Name()> _rule_name;
qi::rule<Iterator, std::string()> _literal;
qi::rule<Iterator, char()> _character;
};
现在它将解析您的样本(更正为 BNF):
    std::string const input = R"(<code>   ::=  <letter><digit> | <letter><digit><code>
<letter> ::= "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i"
<digit> ::= "0" | "1" | "2" | "3" | "4"
)";
Live On Compiler Explorer
打印:
code ::= {<letter>, <digit>} | {<letter>, <digit>, <code>}
letter ::= {a} | {b} | {c} | {d} | {e} | {f} | {g} | {h} | {i}
digit ::= {0} | {1} | {2} | {3} | {4}
Remaining: "
"
支持换行规则
最好的方法是不接受它们 - 因为语法不是为它设计的,不像例如EBNF。
您可以通过在船长中进行负面预测来强制解决问题:
_skipper = blank | (eol >> !_rule);
start = skip(_skipper) [ _rule % +eol ];
由于技术原因( Boost spirit skipper issues )无法编译,因此我们需要在前瞻中为其提供一个占位符跳过:
_blank = blank;
_skipper = blank | (eol >> !skip(_blank.alias()) [ _rule ]);
start = skip(_skipper.alias()) [ _rule % +eol ];
现在它解析相同但有各种换行符:
    std::string const input = R"(<code>   ::=  <letter><digit> | <letter><digit><code>
<letter> ::= "a" | "b" | "c" | "d" | "e"
| "f" | "g" | "h" | "i"
<digit> ::= "0" | "1" | "2" | "3" |
"4"
)";
打印:
code ::= {<letter>, <digit>} | {<letter>, <digit>, <code>}
letter ::= {a} | {b} | {c} | {d} | {e} | {f} | {g} | {h} | {i}
digit ::= {0} | {1} | {2} | {3} | {4}
完整 list
Compiler Explorer
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <fmt/ranges.h>
#include <fmt/ostream.h>
#include <iomanip>
namespace qi = boost::spirit::qi;

namespace Ast {
struct Name : std::string {
using std::string::string;
using std::string::operator=;

friend std::ostream& operator<<(std::ostream& os, Name const& n) {
return os << '<' << n.c_str() << '>';
}
};

using Term = boost::variant<Name, std::string>;
using List = std::list<Term>;
using Expression = std::list<List>;

struct Rule {
Name name; // lhs
Expression rhs;
};

using Syntax = std::list<Rule>;
}

BOOST_FUSION_ADAPT_STRUCT(Ast::Rule, name, rhs)

namespace Parser {
template <typename Iterator>
struct BNF: qi::grammar<Iterator, Ast::Syntax()> {
BNF(): BNF::base_type(start) {
using namespace qi;
_blank = blank;
_skipper = blank | (eol >> !skip(_blank.alias()) [ _rule ]);
start = skip(_skipper.alias()) [ _rule % +eol ];

_rule = _rule_name >> "::=" >> _expression;
_expression = _list % '|';
_list = +_term;
_term = _literal | _rule_name;
_literal = '"' >> *(_character - '"') >> '"'
| "'" >> *(_character - "'") >> "'";
_character = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\\^_`{}~[-");
_rule_name = '<' >> (alpha >> *(alnum | char_('-'))) >> '>';

BOOST_SPIRIT_DEBUG_NODES(
(_rule)(_expression)(_list)(_term)
(_literal)(_character)
(_rule_name))
}

private:
using Skipper = qi::rule<Iterator>;
Skipper _skipper, _blank;

qi::rule<Iterator, Ast::Syntax()> start;
qi::rule<Iterator, Ast::Rule(), Skipper> _rule;
qi::rule<Iterator, Ast::Expression(), Skipper> _expression;
qi::rule<Iterator, Ast::List(), Skipper> _list;
// lexemes
qi::rule<Iterator, Ast::Term()> _term;
qi::rule<Iterator, Ast::Name()> _rule_name;
qi::rule<Iterator, std::string()> _literal;
qi::rule<Iterator, char()> _character;
};
}

int main() {
Parser::BNF<std::string::const_iterator> const parser;

std::string const input = R"(<code> ::= <letter><digit> | <letter><digit><code>
<letter> ::= "a" | "b" | "c" | "d" | "e"
| "f" | "g" | "h" | "i"
<digit> ::= "0" | "1" | "2" | "3" |
"4"
)";

auto it = input.begin(), itEnd = input.end();

Ast::Syntax syntax;
if (parse(it, itEnd, parser, syntax)) {
for (auto& rule : syntax)
fmt::print("{} ::= {}\n", rule.name, fmt::join(rule.rhs, " | "));
} else {
std::cout << "Failed\n";
}

if (it != itEnd)
std::cout << "Remaining: " << std::quoted(std::string(it, itEnd)) << "\n";
}
还有 Live On Coliru (没有 libfmt)

关于c++ - 使用boost精神qi处理gor解析bnf语法的多行规则,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/62967856/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com