- iOS/Objective-C 元类和类别
- objective-c - -1001 错误,当 NSURLSession 通过 httpproxy 和/etc/hosts
- java - 使用网络类获取 url 地址
- ios - 推送通知中不播放声音
我为自定义文本文件格式创建了一个 Qi 解析器。有数以万计的条目要处理,每个条目通常有 1-10 个子条目。我放了一个精简的解析器工作示例 here .
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <fstream>
#include <iostream>
#include <string>
using std::string;
using std::vector;
using std::cout;
using std::endl;
namespace model
{
namespace qi = boost::spirit::qi;
struct spectrum
{
string comment;
string file;
string nativeId;
double precursorMz;
int precursorCharge;
double precursorIntensity;
};
struct cluster
{
string id;
vector<spectrum> spectra;
};
struct clustering
{
string name;
vector<cluster> clusters;
};
}
// Tell fusion about the data structures to make them first-class fusion citizens.
// Must be at global scope.
BOOST_FUSION_ADAPT_STRUCT(
model::spectrum,
(string, comment)
(string, file)
(string, nativeId)
(double, precursorMz)
(int, precursorCharge)
(double, precursorIntensity)
)
BOOST_FUSION_ADAPT_STRUCT(
model::cluster,
(string, id)
(std::vector<model::spectrum>, spectra)
)
BOOST_FUSION_ADAPT_STRUCT(
model::clustering,
(string, name)
(std::vector<model::cluster>, clusters)
)
namespace {
struct ReportError
{
template<typename, typename, typename, typename> struct result { typedef void type; };
// contract the string to the surrounding new-line characters
template<typename Iter>
void operator()(Iter first_iter, Iter last_iter,
Iter error_iter, const boost::spirit::qi::info& what) const
{
std::string first(first_iter, error_iter);
std::string last(error_iter, last_iter);
auto first_pos = first.rfind('\n');
auto last_pos = last.find('\n');
auto error_line = ((first_pos == std::string::npos) ? first
: std::string(first, first_pos + 1))
+ std::string(last, 0, last_pos);
//auto error_pos = (error_iter - first_iter) + 1;
/*auto error_pos = error
if (first_pos != std::string::npos)
error_pos -= (first_pos + 1);*/
std::cerr << "Error parsing in " << what << std::endl
<< error_line << std::endl
//<< std::setw(error_pos) << '^'
<< std::endl;
}
};
const boost::phoenix::function<ReportError> report_error = ReportError();
}
namespace model
{
template <typename Iterator>
struct cluster_parser : qi::grammar<Iterator, clustering(), qi::blank_type>
{
cluster_parser() : cluster_parser::base_type(clusters)
{
using qi::int_;
using qi::lit;
using qi::double_;
using qi::bool_;
using qi::lexeme;
using qi::eol;
using qi::ascii::char_;
using qi::on_error;
using qi::fail;
using namespace qi::labels;
using boost::phoenix::construct;
using boost::phoenix::val;
quoted_string %= lexeme['"' > +(char_ - '"') > '"'];
spectrum_start %=
lit("SPEC") >
"#" > +(char_ - "File:") >
"File:" > quoted_string > lit(",") >
"NativeID:" > quoted_string >
bool_ > double_ > int_ > double_;
cluster_start %=
"=Cluster=" > eol >
"id=" > +(char_ - eol) > eol >
spectrum_start % eol;
clusters %=
"name=" > +(char_ - eol) > eol >
eol >
cluster_start % eol;
BOOST_SPIRIT_DEBUG_NODES((clusters)(cluster_start)(quoted_string)(spectrum_start))
//on_error<fail>(clusters, report_error(_1, _2, _3, _4));
//on_error<fail>(cluster_start, report_error(_1, _2, _3, _4));
//on_error<fail>(spectrum_start, report_error(_1, _2, _3, _4));
//on_error<fail>(quoted_string, report_error(_1, _2, _3, _4));
// on_success(cluster_start, quantify_cluster(_1, _2, _3, _4)); ??
}
qi::rule<Iterator, std::string(), qi::blank_type> quoted_string;
qi::rule<Iterator, cluster(), qi::blank_type> cluster_start;
qi::rule<Iterator, spectrum(), qi::blank_type> spectrum_start;
qi::rule<Iterator, clustering(), qi::blank_type> clusters;
};
}
int main()
{
using namespace model;
cluster_parser<boost::spirit::istream_iterator> g; // Our grammar
string str;
//std::ifstream input("c:/test/Mo_tai.clustering");
std::istringstream input("name=GreedyClustering_0.99\n"
"\n"
"=Cluster=\n"
"id=9c8c5830-5841-4f77-b819-64180509615b\n"
"SPEC\t#file=w:\\test\\Mo_Tai_iTRAQ_f4.mgf#id=index=219#title=Mo_Tai_iTRAQ_f4.1254.1254.2 File:\"Mo_Tai_iTRAQ_f4.raw\", NativeID:\"controllerType=0 controllerNumber=1 scan=1254\"\ttrue\t\t300.1374\t2\t\t\t0.0\n"
"=Cluster=\n"
"id=f8f384a1-3d5f-4af1-9581-4d03a5aa3342\n"
"SPEC\t#file=w:\\test\\Mo_Tai_iTRAQ_f9.mgf#id=index=560#title=Mo_Tai_iTRAQ_f9.1666.1666.3 File:\"Mo_Tai_iTRAQ_f9.raw\", NativeID:\"controllerType=0 controllerNumber=1 scan=1666\"\ttrue\t\t300.14413\t3\t\t\t0.0\n"
"SPEC\t#file=w:\\test\\Mo_Tai_iTRAQ_f9.mgf#id=index=520#title=Mo_Tai_iTRAQ_f9.1621.1621.3 File:\"Mo_Tai_iTRAQ_f9.raw\", NativeID:\"controllerType=0 controllerNumber=1 scan=1621\"\ttrue\t\t300.14197\t3\t\t\t0.0\n"
"=Cluster=\n"
"id=b84b79e1-44bc-44c0-a9af-5391ca02582d\n"
"SPEC\t#file=w:\\test\\Mo_Tai_iTRAQ_f2.mgf#id=index=7171#title=Mo_Tai_iTRAQ_f2.12729.12729.2 File:\"Mo_Tai_iTRAQ_f2.raw\", NativeID:\"controllerType=0 controllerNumber=1 scan=12729\"\ttrue\t\t300.15695\t2\t\t\t0.0");
input.unsetf(std::ios::skipws);
boost::spirit::istream_iterator begin(input);
boost::spirit::istream_iterator end;
clustering clusteringResults;
bool r = phrase_parse(begin, end, g, qi::blank, clusteringResults);
if (r && begin == end)
{
cout << "Parsing succeeded (" << clusteringResults.clusters.size() << " clusters)\n";
/*for (size_t i = 0; i < std::min((size_t)10, clusteringResults.clusters.size()); ++i)
{
cluster& c = clusteringResults.clusters[i];
cout << "Cluster " << c.id << " - avg. precursor m/z: " << c.avgPrecursorMz << ", num. spectra: " << c.spectra.size() << endl;
}*/
return 1;
}
else
{
std::cout << "Parsing failed (" << clusteringResults.clusters.size() << " clusters)\n";
if (!clusteringResults.clusters.empty())
{
cluster& c = clusteringResults.clusters.back();
cout << "Last cluster parsed " << c.id << ", num. spectra: " << c.spectra.size() << endl;
}
return 1;
}
}
我不想在处理之前将整个文件解析到内存中。如何让它在每个簇解析完后排队一个条目(簇)进行处理,处理完删除簇,然后继续解析?更好的方法是让另一个线程异步处理处理。
最佳答案
只需使用流式迭代器。
或者对内存映射文件进行操作。
在处理端,将 Action 从语义 Action 内部推送到队列。
Note: you could run into a supposed bug that doesn't clear the backtrack buffers properly; You might want to check this and take preventative measures as described in this answer: Boost spirit memory leak using
flush_multi_pass
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/io.hpp>
namespace model
{
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
struct spectrum {
std::string comment;
std::string file;
std::string nativeId;
double precursorMz;
int precursorCharge;
double precursorIntensity;
};
struct cluster {
std::string id;
std::vector<spectrum> spectra;
};
}
BOOST_FUSION_ADAPT_STRUCT(model::spectrum, comment, file, nativeId, precursorMz, precursorCharge, precursorIntensity)
BOOST_FUSION_ADAPT_STRUCT(model::cluster, id, spectra)
namespace model
{
template <typename Iterator>
struct cluster_parser : qi::grammar<Iterator>
{
cluster_parser(std::function<void(std::string const&, model::cluster const&)> handler)
: cluster_parser::base_type(start),
submit_(handler)
{
using namespace qi;
quoted_string %= lexeme['"' > +(char_ - '"') > '"'];
spectrum_start %=
lit("SPEC") >
"#" > +(char_ - "File:") >
"File:" > quoted_string > lit(",") >
"NativeID:" > quoted_string >
bool_ > double_ > int_ > double_;
cluster_start %=
"=Cluster=" > eol >
"id=" > +(char_ - eol) > eol >
spectrum_start % eol;
clusters %=
"name=" > qi::as_string[ +(char_ - eol) ][ name_ = _1 ] > eol > eol >
cluster_start [ submit_(name_, _1) ] % eol;
start = skip(blank) [clusters];
BOOST_SPIRIT_DEBUG_NODES((start)(clusters)(cluster_start)(quoted_string)(spectrum_start))
}
private:
qi::_a_type name_;
px::function<std::function<void(std::string const&, model::cluster const&)> > submit_;
qi::rule<Iterator, std::string(), qi::blank_type> quoted_string;
qi::rule<Iterator, cluster(), qi::blank_type> cluster_start;
qi::rule<Iterator, spectrum(), qi::blank_type> spectrum_start;
qi::rule<Iterator, qi::locals<std::string>, qi::blank_type> clusters;
qi::rule<Iterator> start;
};
}
int main()
{
using namespace model;
cluster_parser<boost::spirit::istream_iterator> g([&](auto const&...){std::cout << "handled\n";}); // Our grammar
std::string str;
//std::ifstream input("c:/test/Mo_tai.clustering");
std::istringstream input(R"(name=GreedyClustering_0.99
=Cluster=
id=9c8c5830-5841-4f77-b819-64180509615b
SPEC #file=w:\test\Mo_Tai_iTRAQ_f4.mgf#id=index=219#title=Mo_Tai_iTRAQ_f4.1254.1254.2 File:"Mo_Tai_iTRAQ_f4.raw", NativeID:"controllerType=0 controllerNumber=1 scan=1254" true 300.1374 2 0.0
=Cluster=
id=f8f384a1-3d5f-4af1-9581-4d03a5aa3342
SPEC #file=w:\test\Mo_Tai_iTRAQ_f9.mgf#id=index=560#title=Mo_Tai_iTRAQ_f9.1666.1666.3 File:"Mo_Tai_iTRAQ_f9.raw", NativeID:"controllerType=0 controllerNumber=1 scan=1666" true 300.14413 3 0.0
SPEC #file=w:\test\Mo_Tai_iTRAQ_f9.mgf#id=index=520#title=Mo_Tai_iTRAQ_f9.1621.1621.3 File:"Mo_Tai_iTRAQ_f9.raw", NativeID:"controllerType=0 controllerNumber=1 scan=1621" true 300.14197 3 0.0
=Cluster=
id=b84b79e1-44bc-44c0-a9af-5391ca02582d
SPEC #file=w:\test\Mo_Tai_iTRAQ_f2.mgf#id=index=7171#title=Mo_Tai_iTRAQ_f2.12729.12729.2 File:"Mo_Tai_iTRAQ_f2.raw", NativeID:"controllerType=0 controllerNumber=1 scan=12729" true 300.15695 2 0.0)");
input.unsetf(std::ios::skipws);
boost::spirit::istream_iterator begin(input);
boost::spirit::istream_iterator end;
bool r = phrase_parse(begin, end, g, qi::blank);
if (r && begin == end) {
std::cout << "Parsing succeeded\n";
}
else {
std::cout << "Parsing failed\n";
}
if (begin!=end) {
std::cout << "Unparsed remaining input: '" << std::string(begin, end) << "\n";
}
return (r && begin==end)? 0 : 1;
}
打印
handled
handled
handled
Parsing succeeded
这是一个在线程池上调度集群以进行异步处理的版本。
Note that the submit method posts a lambda to the service. The lambda captures by value because the lifetime of the parameters should extend during the processing.
#include <boost/asio.hpp>
#include <boost/thread.hpp>
namespace ba = boost::asio;
struct Processing {
Processing() {
for (unsigned i=0; i < boost::thread::hardware_concurrency(); ++i)
_threads.create_thread([this] { _svc.run(); });
}
~Processing() {
_work.reset();
_threads.join_all();
}
void submit(std::string const& name, model::cluster const& cluster) {
_svc.post([=] { do_processing(name, cluster); });
}
private:
void do_processing(std::string const& name, model::cluster const& cluster) {
std::cout << "Thread " << boost::this_thread::get_id() << ": " << name << " cluster of " << cluster.spectra.size() << " spectra\n";
boost::this_thread::sleep_for(boost::chrono::milliseconds(950));
}
ba::io_service _svc;
boost::optional<ba::io_service::work> _work = ba::io_service::work(_svc);
boost::thread_group _threads;
};
[...snip...] 和主要内容:
Processing processing;
auto handler = [&processing](auto&... args) { processing.submit(args...); };
cluster_parser<boost::spirit::istream_iterator> g(handler); // Our grammar
其余未修改,现在打印(例如):
Thread 7f0144a5b700: GreedyClustering_0.99 cluster of 1 spectra
Thread 7f014425a700: GreedyClustering_0.99 cluster of 2 spectra
Parsing succeeded
Thread 7f0143a59700: GreedyClustering_0.99 cluster of 1 spectra
关于c++ - 如何使用 Boost.Spirit.Qi 增量解析(并作用于)大文件?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/41748596/
我正在将一个手写解析器迁移到 Boost.Spirit (2.5.4)。第一印象是积极的,但由于我使用的是 C++17,X3 似乎是一个非常有吸引力的选择。 幸运的是,有很多关于 X3 的可用资源:
是否可以使用 boost::spirit::qi 来解析以下内容? A_B --> (A, B) A_B_C --> (A_B, C) A_B_C_D --> (A_B_
我正在尝试解析一种类似 lisp 的语言,它具有一些通用功能的语法糖。例如,plus 函数可以写成 (+ 1 2) 或 1 + 2。我认为在尝试解释语言之前消除句法糖会显着促进解释过程,因为那样的话,
我正在尝试解析一种类似 lisp 的语言,它具有一些通用功能的语法糖。例如,plus 函数可以写成 (+ 1 2) 或 1 + 2。我认为在尝试解释语言之前消除句法糖会显着促进解释过程,因为那样的话,
我想使用解析后的值作为循环解析器的输入。 语法定义了一个 header ,它指定了以下字符串的(可变)大小。例如,假设以下字符串是某个解析器的输入。 12\r\nTest Payload 解析器应提取
我正在编写 DSL 并使用 Boost Spirit 词法分析器来标记我的输入。在我的语法中,我想要一个类似于此的规则(其中 tok 是词法分析器): header_block = tok.n
我有以下精神语法。我正在尝试在 struct myresult 中创建 AST 节点的向量使用标准 push_back(at_c(qi::_val), qi::_1)但出现编译错误(见下文)。 typ
需要为 std::pair 对象提供类型为 boost::variant 的对象的值。您将如何使用其他资源来实现这个想法?下面还有其他方法吗? struct aggr_pair_visitor
我有一个词法分析器,基于该词法分析器,我现在想创建一个使用该词法分析器生成的标记的语法。我尝试改编我发现的一些示例,现在我有一些可以编译和工作的东西至少有一点,但我的一个应该失败的测试却没有。现在我想
当我使用此 qi 语法从 Lex 接受标记时: pair %= token(ID_MARKER) >> ':' >> atom >> ',' >> atom
如何解析可能包含 double 或 int 的字符串,具体取决于是否设置了点。例如。 6.0是double类型,6是int类型。规则是 rule,skipper> r = qi::double_|qi
请帮助我诊断以下错误。我有一个简单的语法: struct json_start_elem_grammar_object : qi::grammar { json_start_elem_gramma
作为使用 Boost.Spirit 的更大语法的第一阶段,我尝试解析“true”和“false”以生成相应的 bool 值,true 和 false. 我正在使用 Spirit.Lex 对输入进行标记
我正在尝试解析一个也可以包含标识符的表达式并将每个元素推送到 std::vector 中,我想出了以下语法: #include #include #include #include name
我正在为 if 函数实现生产规则: qi::rule f_if; f_if = qi::ascii::string("if") >> qi::char_('(')
我编写了这段代码示例并期望它打印OPERATION( OPERATOR(aaa) ID(bbb) ) 但我只得到OPERATION ( OPERATOR(aaa) )反而。 result2 和 it1
我的数据定义为: std::string data("START34*23*43**"); 我的语法: "START" >> boost::spirit::hex % '*' 题: 如何解析有两颗星的
我编写了这段代码示例并期望它打印OPERATION( OPERATOR(aaa) ID(bbb) ) 但我只得到OPERATION ( OPERATOR(aaa) )反而。 result2 和 it1
我需要解析一个键值对,其中键本身是示例中的固定字符串lke'cmd'。不幸的是qi::lit没有综合属性,并且qi::char_没有解析固定的字符串。 以下代码无法编译。执行后,我需要那个result
我正在尝试编写精神规则,但我无法弄清楚这个新规则的属性是什么。 以下代码按我预期的方式工作。 #include #include #include #include #include nam
我是一名优秀的程序员,十分优秀!