gpt4 book ai didi

c++ - 使用 istream 迭代器时解析失败

转载 作者:塔克拉玛干 更新时间:2023-11-03 07:18:10 24 4
gpt4 key购买 nike

我正在使用 boost::spirit 来解析 csv 输入(请不要建议替代方案,这只是测试)。当我将 stdin 的内容读入一个字符串并对其进行迭代时,解析成功;但是,当 std::cin 的内容直接读取(通过我自己编写的包装器,因为 phrase_parse 需要一个继承自 std::iterator<std::forward_iterator_tag, T> 的迭代器,而 std::istream_iterator<T> 不这样做),解析失败,我不知道为什么,因为调试输出似乎表明在两种情况下都解析了相同的文本,但结果不同。

我什至尝试遍历 std::cin并将其放入字符串中,并正确解析;我不明白为什么提供的迭代器类型会影响结果。这是我正在处理的示例(抱歉,它太大了,但您可以将其插入并轻松编译)。尝试定义宏 SECTION_STRINGSTREAM (成功)或 SECTION_CIN (失败)观察奇怪的行为(默认行为(成功)是当 std::cin 被读取为字符串时)。

如果您使用 echo "\"f\",111,222,333,\"ref_type\",\"spc\",\"type\",\"lan\",\"name\",\"scop\"" | ./spirit_csv 编译并运行它,调试输出清楚地显示正在解析整个字符串。我还添加了 if (++start == end) std::cerr << "woah";这在所有情况下都会被触发,所以看起来它肯定是在解析到输入的末尾。

// following example from:
// http://www.boost.org/doc/libs/1_58_0/libs/spirit/example/qi/employee.cpp, and
// num_list4.cpp, and others

#define BOOST_SPIRIT_DEBUG 1
#define BOOST_SPIRIT_DEBUG_PRINT_SOME 200
#define BOOST_SPIRIT_DEBUG_OUT std::cerr

// std includes
#include <iostream>
#include <string>
// boost includes
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>

namespace frontend {
namespace spirit = boost::spirit;
namespace qi = spirit::qi;
namespace ascii = spirit::ascii;

struct cursor {
std::string file;
unsigned long long offset;
unsigned long long line;
unsigned long long col;
// verify inputs using enum
// decl/ref/defn/call
std::string reference_type;
// variable/function/scope/label/type
std::string specifier;
// if variable/function, then type
std::string type;
std::string language;
std::string name;
std::string scope;
};
}

// adapt struct to boost fusion
BOOST_FUSION_ADAPT_STRUCT(frontend::cursor, (std::string, file),
(unsigned long long, offset),
(unsigned long long, line), (unsigned long long, col),
(std::string, reference_type),
(std::string, specifier), (std::string, type),
(std::string, language), (std::string, name),
(std::string, scope));

// note: blank_type is so that newlines aren't counted as skippable, because
// they are significant for csv! however, typically you'll be wanting to use
// boost::spirit::ascii::space as your whitespace operator if you really do not
// care about whitespace
namespace frontend {
template <typename Iterator>
struct cursor_parser
: public qi::grammar<Iterator, std::vector<cursor>(), qi::blank_type> {
qi::rule<Iterator, std::string(), qi::blank_type> quoted_string;
qi::rule<Iterator, cursor(), qi::blank_type> start;
qi::rule<Iterator, std::vector<cursor>(), qi::blank_type> vec;

cursor_parser() : cursor_parser::base_type(vec) {
using qi::uint_;
using qi::eol;
using qi::lexeme;
using qi::_1;
using ascii::char_;
using boost::phoenix::push_back;
using boost::phoenix::ref;
using boost::spirit::_val;

quoted_string %= lexeme['"' >> *(char_ - '"') >> '"'];

start %=
// file
quoted_string >> ',' >>
// offset
uint_ >> ',' >>
// line
uint_ >> ',' >>
// col
uint_ >> ',' >>
// reference_type
quoted_string >> ',' >>
// specifier
quoted_string >> ',' >>
// type
quoted_string >> ',' >>
// language
quoted_string >> ',' >>
// name
quoted_string >> ',' >>
// scope
quoted_string;

vec %= start % eol;

quoted_string.name("qs");
debug(quoted_string);
start.name("s");
debug(start);
vec.name("v");
debug(vec);
}
};

template <typename T>
class cin_forward_iterator : std::iterator<std::forward_iterator_tag, T> {
private:
std::istream_iterator<T> i;

public:
cin_forward_iterator() : i(std::istream_iterator<T>()) {}
cin_forward_iterator(std::istream &in) : i(std::istream_iterator<T>(in)) {}
const T &operator*() const { return *i; }
cin_forward_iterator<T> operator++() {
++i;
return *this;
};
cin_forward_iterator<T> operator++(int) {
cin_forward_iterator<T> tmp = *this;
i++;
return tmp;
};
bool operator==(const cin_forward_iterator<T> &rhs) const {
return i == rhs.i;
}
bool operator!=(const cin_forward_iterator<T> &rhs) const {
return not(*this == rhs);
}
};
}

namespace std {
template <typename T> class iterator_traits<frontend::cin_forward_iterator<T>> {
public:
typedef typename std::istream_iterator<T>::value_type value_type;
typedef typename std::istream_iterator<T>::difference_type difference_type;
typedef typename std::istream_iterator<T>::reference reference;
typedef typename std::istream_iterator<T>::pointer pointer;
typedef std::forward_iterator_tag iterator_category;
};
}

/* try:
echo \
"\"f\",111,222,333,\"ref_type\",\"spc\",\"type\",\"lan\",\"name\",\"scop\"" \
| ./spirit_csv
*/
int main() {
std::vector<frontend::cursor> v;
// succeeds
#ifdef SECTION_STRINGSTREAM
std::stringstream ss;
ss << std::cin.rdbuf();
std::string s(ss.str());
auto start = s.cbegin();
auto end = s.cend();
// fails
#elif SECTION_CIN
noskipws(std::cin);
frontend::cin_forward_iterator<char> start(std::cin);
frontend::cin_forward_iterator<char> end;
// succeeds
#else
noskipws(std::cin);
frontend::cin_forward_iterator<char> start_in(std::cin);
frontend::cin_forward_iterator<char> end_in;
std::string s;
for (; start_in != end_in; ++start_in) {
s += *start_in;
}
auto start = s.begin();
auto end = s.end();
#endif
if (phrase_parse(start, end,
#ifdef SECTION_STRINGSTREAM
frontend::cursor_parser<std::string::const_iterator>(),
#elif SECTION_CIN
frontend::cursor_parser<
frontend::cin_forward_iterator<char>>(),
#else
frontend::cursor_parser<std::string::iterator>(),
#endif
boost::spirit::qi::blank, v)) {
for (auto &c : v) {
std::cout << boost::fusion::as_vector(c) << std::endl;
}
std::cerr << "success!" << std::endl;
return 0;
} else {
std::cerr << "failure!" << std::endl;
return 1;
}
}

最佳答案

为什么要有自己的迭代器?

很难做到正确,而且看起来您显然没有让它具有多 channel 感知能力。

输入迭代器与前向迭代器的类别不同是有原因的!只是掩盖它没有帮助。前向迭代器必须是可复制的,并且在取消引用时具有可重复的值。输入迭代器不满足这些条件。

In fact you should either just use boost::spirit::istream_iterator or you could compose an iterator using Spirit's multi_pass adaptor:

这是一个修复和清理过的版本:

Live On Coliru

#define BOOST_SPIRIT_DEBUG 1
#define BOOST_SPIRIT_DEBUG_PRINT_SOME 200
#define BOOST_SPIRIT_DEBUG_OUT std::cerr

// std includes
#include <iostream>
#include <string>
// boost includes
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/as_vector.hpp>

namespace frontend {
namespace qi = boost::spirit::qi;

struct cursor {
std::string file;
unsigned long long offset;
unsigned long long line;
unsigned long long col;
// verify inputs using enum
// decl/ref/defn/call
std::string reference_type;
// variable/function/scope/label/type
std::string specifier;
// if variable/function, then type
std::string type;
std::string language;
std::string name;
std::string scope;
};
}

// adapt struct to boost fusion
BOOST_FUSION_ADAPT_STRUCT(frontend::cursor,
(std::string, file)
(unsigned long long, offset)
(unsigned long long, line)
(unsigned long long, col)
(std::string, reference_type)
(std::string, specifier)
(std::string, type)
(std::string, language)
(std::string, name)
(std::string, scope))

namespace frontend {

// NOTE: blank_type doesn't skip newlines
template <typename Iterator>
struct cursor_parser : public qi::grammar<Iterator, std::vector<cursor>(), qi::blank_type> {

cursor_parser() : cursor_parser::base_type(vec) {
using qi::uint_;
using qi::eol;
using qi::lexeme;
using qi::char_;

quoted_string %= lexeme['"' >> *(char_ - '"') >> '"'];

start %=
quoted_string >> ',' >> // file
uint_ >> ',' >> // offset
uint_ >> ',' >> // line
uint_ >> ',' >> // col
quoted_string >> ',' >> // reference_type
quoted_string >> ',' >> // specifier
quoted_string >> ',' >> // type
quoted_string >> ',' >> // language
quoted_string >> ',' >> // name
quoted_string; // scope

vec %= start % eol;

BOOST_SPIRIT_DEBUG_NODES((quoted_string)(start)(vec))
}

private:
qi::rule<Iterator, std::string() , qi::blank_type> quoted_string;
qi::rule<Iterator, cursor() , qi::blank_type> start;
qi::rule<Iterator, std::vector<cursor>(), qi::blank_type> vec;
};
}

int main() {
// '"f",111,222,333,"ref_type","spc","type","lan","name","scop"'
using It = boost::spirit::istream_iterator;

It start_in(std::cin >> std::noskipws), end_in;
std::vector<frontend::cursor> v;

if (phrase_parse(start_in, end_in, frontend::cursor_parser<It>(), frontend::qi::blank, v)) {
for (auto &c : v) {
std::cout << boost::fusion::as_vector(c) << std::endl;
}
std::cerr << "success!" << std::endl;
} else {
std::cerr << "failure!" << std::endl;
return 1;
}
}

输出

(f 111 222 333 ref_type spc type lan name scop)
success!

调试输出:

<vec>
<try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"\n</try>
<start>
<try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"\n</try>
<quoted_string>
<try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"\n</try>
<success>,111,222,333,"ref_type","spc","type","lan","name","scop"\n</success>
<attributes>[[f]]</attributes>
</quoted_string>
<quoted_string>
<try>"ref_type","spc","type","lan","name","scop"\n</try>
<success>,"spc","type","lan","name","scop"\n</success>
<attributes>[[r, e, f, _, t, y, p, e]]</attributes>
</quoted_string>
<quoted_string>
<try>"spc","type","lan","name","scop"\n</try>
<success>,"type","lan","name","scop"\n</success>
<attributes>[[s, p, c]]</attributes>
</quoted_string>
<quoted_string>
<try>"type","lan","name","scop"\n</try>
<success>,"lan","name","scop"\n</success>
<attributes>[[t, y, p, e]]</attributes>
</quoted_string>
<quoted_string>
<try>"lan","name","scop"\n</try>
<success>,"name","scop"\n</success>
<attributes>[[l, a, n]]</attributes>
</quoted_string>
<quoted_string>
<try>"name","scop"\n</try>
<success>,"scop"\n</success>
<attributes>[[n, a, m, e]]</attributes>
</quoted_string>
<quoted_string>
<try>"scop"\n</try>
<success>\n</success>
<attributes>[[s, c, o, p]]</attributes>
</quoted_string>
<success>\n</success>
<attributes>[[[f], 111, 222, 333, [r, e, f, _, t, y, p, e], [s, p, c], [t, y, p, e], [l, a, n], [n, a, m, e], [s, c, o, p]]]</attributes>
</start>
<start>
<try></try>
<quoted_string>
<try></try>
<fail/>
</quoted_string>
<fail/>
</start>
<success>\n</success>
<attributes>[[[[f], 111, 222, 333, [r, e, f, _, t, y, p, e], [s, p, c], [t, y, p, e], [l, a, n], [n, a, m, e], [s, c, o, p]]]]</attributes>
</vec>

注意事项:

  • 您在 BOOST_FUSION_ADAPT_STRUCT 宏调用中出错(逗号太多)

关于c++ - 使用 istream 迭代器时解析失败,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/30454331/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com