- c - 在位数组中找到第一个零
- linux - Unix 显示有关匹配两种模式之一的文件的信息
- 正则表达式替换多个文件
- linux - 隐藏来自 xtrace 的命令
我想使用 boost::spirit 来从原始公式中提取由多种元素组成的化合物的化学计量。在给定的化合物中,我的解析器应该能够区分三种化学元素模式:
然后使用这些模式来解析以下化合物:
显然,化学元素模式可以是任何顺序(例如 CH[1]4 和 H[1]4C ...)和频率。
我编写了我的解析器,它非常接近于完成这项工作,但我仍然面临一个问题。
这是我的代码:
template <typename Iterator>
struct ChemicalFormulaParser : qi::grammar<Iterator,isotopesMixture(),qi::locals<isotopesMixture,double>>
{
ChemicalFormulaParser(): ChemicalFormulaParser::base_type(_start)
{
namespace phx = boost::phoenix;
// Semantic action for handling the case of pure isotope
phx::function<PureIsotopeBuilder> const build_pure_isotope = PureIsotopeBuilder();
// Semantic action for handling the case of pure isotope mixture
phx::function<IsotopesMixtureBuilder> const build_isotopes_mixture = IsotopesMixtureBuilder();
// Semantic action for handling the case of natural element
phx::function<NaturalElementBuilder> const build_natural_element = NaturalElementBuilder();
phx::function<UpdateElement> const update_element = UpdateElement();
// XML database that store all the isotopes of the periodical table
ChemicalDatabaseManager<Isotope>* imgr=ChemicalDatabaseManager<Isotope>::Instance();
const auto& isotopeDatabase=imgr->getDatabase();
// Loop over the database to the spirit symbols for the isotopes names (e.g. H[1],C[14]) and the elements (e.g. H,C)
for (const auto& isotope : isotopeDatabase) {
_isotopeNames.add(isotope.second.getName(),isotope.second.getName());
_elementSymbols.add(isotope.second.getProperty<std::string>("symbol"),isotope.second.getProperty<std::string>("symbol"));
}
_mixtureToken = "{" >> +(_isotopeNames >> "(" >> qi::double_ >> ")") >> "}";
_isotopesMixtureToken = (_elementSymbols[qi::_a=qi::_1] >> _mixtureToken[qi::_b=qi::_1])[qi::_pass=build_isotopes_mixture(qi::_val,qi::_a,qi::_b)];
_pureIsotopeToken = (_isotopeNames[qi::_a=qi::_1])[qi::_pass=build_pure_isotope(qi::_val,qi::_a)];
_naturalElementToken = (_elementSymbols[qi::_a=qi::_1])[qi::_pass=build_natural_element(qi::_val,qi::_a)];
_start = +( ( (_isotopesMixtureToken | _pureIsotopeToken | _naturalElementToken)[qi::_a=qi::_1] >>
(qi::double_|qi::attr(1.0))[qi::_b=qi::_1])[qi::_pass=update_element(qi::_val,qi::_a,qi::_b)] );
}
//! Defines the rule for matching a prefix
qi::symbols<char,std::string> _isotopeNames;
qi::symbols<char,std::string> _elementSymbols;
qi::rule<Iterator,isotopesMixture()> _mixtureToken;
qi::rule<Iterator,isotopesMixture(),qi::locals<std::string,isotopesMixture>> _isotopesMixtureToken;
qi::rule<Iterator,isotopesMixture(),qi::locals<std::string>> _pureIsotopeToken;
qi::rule<Iterator,isotopesMixture(),qi::locals<std::string>> _naturalElementToken;
qi::rule<Iterator,isotopesMixture(),qi::locals<isotopesMixture,double>> _start;
};
基本上,每个单独的元素模式都可以通过它们各自的语义 Action 进行正确解析,这些语义 Action 会在构建化合物的同位素与其相应的化学计量之间生成映射作为输出。解析以下化合物时问题开始:
CH{H[1](0.9)H[2](0.4)}
在这种情况下,语义操作 build_isotopes_mixture
返回 false,因为 0.9+0.4 对于比率之和没有意义。因此,我会期望并希望我的解析器因该化合物而失败。然而,由于 _start
规则对三种化学元素模式使用替代运算符,解析器设法通过 1) 丢弃 {H[1](0.9) 来解析它H[2](0.4)}
第 2 部分)保留前面的 H
3)使用 _naturalElementToken
解析它。我的语法不够清晰,无法表达为解析器吗?如何以这样的方式使用替代运算符,即在运行语义操作时,当发现事件但给出false
时,解析器停止?
最佳答案
How to use the alternative operator in such a way that, when an occurrence has been found but gave a false when running the semantic action, the parser stops ?
通常,您可以通过添加 expectation point 来实现此目的以防止回溯。
在这种情况下,您实际上是在“合并”几个任务:
Spirit 擅长匹配输入,具有 在口译方面有很好的便利(主要是在 AST 创建的意义上)。然而,动态验证会让事情变得“糟糕”。
我经常重复的一个建议是,尽可能考虑分离关注点。我会考虑
这为您提供了最具表现力的代码,同时保持其高度可维护性。
因为我对问题领域的理解不够透彻,而且代码示例还不够完整,无法归纳出它,所以我不会尝试给出我的想法的完整示例。相反,我会尽最大努力勾勒出我在一开始提到的期望点方法。
这花费了最多的时间。 (考虑为将要帮助您的人做腿部工作)
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <map>
namespace qi = boost::spirit::qi;
struct DummyBuilder {
using result_type = bool;
template <typename... Ts>
bool operator()(Ts&&...) const { return true; }
};
struct PureIsotopeBuilder : DummyBuilder { };
struct IsotopesMixtureBuilder : DummyBuilder { };
struct NaturalElementBuilder : DummyBuilder { };
struct UpdateElement : DummyBuilder { };
struct Isotope {
std::string getName() const { return _name; }
Isotope(std::string const& name = "unnamed", std::string const& symbol = "?") : _name(name), _symbol(symbol) { }
template <typename T> std::string getProperty(std::string const& name) const {
if (name == "symbol")
return _symbol;
throw std::domain_error("no such property (" + name + ")");
}
private:
std::string _name, _symbol;
};
using MixComponent = std::pair<Isotope, double>;
using isotopesMixture = std::list<MixComponent>;
template <typename Isotope>
struct ChemicalDatabaseManager {
static ChemicalDatabaseManager* Instance() {
static ChemicalDatabaseManager s_instance;
return &s_instance;
}
auto& getDatabase() { return _db; }
private:
std::map<int, Isotope> _db {
{ 1, { "H[1]", "H" } },
{ 2, { "H[2]", "H" } },
{ 3, { "Carbon", "C" } },
{ 4, { "U[235]", "U" } },
};
};
template <typename Iterator>
struct ChemicalFormulaParser : qi::grammar<Iterator, isotopesMixture(), qi::locals<isotopesMixture, double> >
{
ChemicalFormulaParser(): ChemicalFormulaParser::base_type(_start)
{
using namespace qi;
namespace phx = boost::phoenix;
phx::function<PureIsotopeBuilder> build_pure_isotope; // Semantic action for handling the case of pure isotope
phx::function<IsotopesMixtureBuilder> build_isotopes_mixture; // Semantic action for handling the case of pure isotope mixture
phx::function<NaturalElementBuilder> build_natural_element; // Semantic action for handling the case of natural element
phx::function<UpdateElement> update_element;
// XML database that store all the isotopes of the periodical table
ChemicalDatabaseManager<Isotope>* imgr = ChemicalDatabaseManager<Isotope>::Instance();
const auto& isotopeDatabase=imgr->getDatabase();
// Loop over the database to the spirit symbols for the isotopes names (e.g. H[1],C[14]) and the elements (e.g. H,C)
for (const auto& isotope : isotopeDatabase) {
_isotopeNames.add(isotope.second.getName(),isotope.second.getName());
_elementSymbols.add(isotope.second.template getProperty<std::string>("symbol"),isotope.second.template getProperty<std::string>("symbol"));
}
_mixtureToken = "{" >> +(_isotopeNames >> "(" >> double_ >> ")") >> "}";
_isotopesMixtureToken = (_elementSymbols[_a=_1] >> _mixtureToken[_b=_1])[_pass=build_isotopes_mixture(_val,_a,_b)];
_pureIsotopeToken = (_isotopeNames[_a=_1])[_pass=build_pure_isotope(_val,_a)];
_naturalElementToken = (_elementSymbols[_a=_1])[_pass=build_natural_element(_val,_a)];
_start = +( ( (_isotopesMixtureToken | _pureIsotopeToken | _naturalElementToken)[_a=_1] >>
(double_|attr(1.0))[_b=_1]) [_pass=update_element(_val,_a,_b)] );
}
private:
//! Defines the rule for matching a prefix
qi::symbols<char, std::string> _isotopeNames;
qi::symbols<char, std::string> _elementSymbols;
qi::rule<Iterator, isotopesMixture()> _mixtureToken;
qi::rule<Iterator, isotopesMixture(), qi::locals<std::string, isotopesMixture> > _isotopesMixtureToken;
qi::rule<Iterator, isotopesMixture(), qi::locals<std::string> > _pureIsotopeToken;
qi::rule<Iterator, isotopesMixture(), qi::locals<std::string> > _naturalElementToken;
qi::rule<Iterator, isotopesMixture(), qi::locals<isotopesMixture, double> > _start;
};
int main() {
using It = std::string::const_iterator;
ChemicalFormulaParser<It> parser;
for (std::string const input : {
"C", // --> natural carbon made of C[12] and C[13] in natural abundance
"CH4", // --> methane made of natural carbon and hydrogen
"C2H{H[1](0.8)H[2](0.2)}6", // --> ethane made of natural C and non-natural H made of 80% of hydrogen and 20% of deuterium
"C2H{H[1](0.9)H[2](0.2)}6", // --> invalid mixture (total is 110%?)
"U[235]", // --> pure uranium 235
})
{
std::cout << " ============= '" << input << "' ===========\n";
It f = input.begin(), l = input.end();
isotopesMixture mixture;
bool ok = qi::parse(f, l, parser, mixture);
if (ok)
std::cout << "Parsed successfully\n";
else
std::cout << "Parse failure\n";
if (f != l)
std::cout << "Remaining input unparsed: '" << std::string(f, l) << "'\n";
}
}
正如给定的那样,它只是打印
============= 'C' ===========
Parsed successfully
============= 'CH4' ===========
Parsed successfully
============= 'C2H{H[1](0.8)H[2](0.2)}6' ===========
Parsed successfully
============= 'C2H{H[1](0.9)H[2](0.2)}6' ===========
Parsed successfully
============= 'U[235]' ===========
Parsed successfully
不需要本地人,只需使用常规占位符:
_mixtureToken = "{" >> +(_isotopeNames >> "(" >> double_ >> ")") >> "}";
_isotopesMixtureToken = (_elementSymbols >> _mixtureToken) [ _pass=build_isotopes_mixture(_val, _1, _2) ];
_pureIsotopeToken = _isotopeNames [ _pass=build_pure_isotope(_val, _1) ];
_naturalElementToken = _elementSymbols [ _pass=build_natural_element(_val, _1) ];
_start = +(
( (_isotopesMixtureToken | _pureIsotopeToken | _naturalElementToken) >>
(double_|attr(1.0)) ) [ _pass=update_element(_val, _1, _2) ]
);
// ....
qi::rule<Iterator, isotopesMixture()> _mixtureToken;
qi::rule<Iterator, isotopesMixture()> _isotopesMixtureToken;
qi::rule<Iterator, isotopesMixture()> _pureIsotopeToken;
qi::rule<Iterator, isotopesMixture()> _naturalElementToken;
qi::rule<Iterator, isotopesMixture()> _start;
您需要处理名称/符号之间的冲突(可能只是通过优先考虑其中一个)
符合标准的编译器将需要 template
限定符(除非我完全误解了您的数据结构,在这种情况下我不知道 ChemicalDatabaseManager
的模板参数是什么> 应该是这个意思)。
Hint, MSVC is not a standards-conforming compiler
假设“权重”需要在 _mixtureToken
规则内加起来达到 100%,我们可以使 build_isotopes_micture
“不是虚拟的”并添加验证:
struct IsotopesMixtureBuilder {
bool operator()(isotopesMixture&/* output*/, std::string const&/* elementSymbol*/, isotopesMixture const& mixture) const {
using namespace boost::adaptors;
// validate weights total only
return std::abs(1.0 - boost::accumulate(mixture | map_values, 0.0)) < 0.00001;
}
};
但是,正如您所注意到的,它会通过回溯来阻碍事情的进行。相反,你可以/断言/任何完整的混合物总和为 100%:
_mixtureToken = "{" >> +(_isotopeNames >> "(" >> double_ >> ")") >> "}" > eps(validate_weight_total(_val));
用类似的东西
struct ValidateWeightTotal {
bool operator()(isotopesMixture const& mixture) const {
using namespace boost::adaptors;
bool ok = std::abs(1.0 - boost::accumulate(mixture | map_values, 0.0)) < 0.00001;
return ok;
// or perhaps just :
return ok? ok : throw InconsistentsWeights {};
}
struct InconsistentsWeights : virtual std::runtime_error {
InconsistentsWeights() : std::runtime_error("InconsistentsWeights") {}
};
};
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/range/adaptors.hpp>
#include <boost/range/numeric.hpp>
#include <map>
namespace qi = boost::spirit::qi;
struct DummyBuilder {
using result_type = bool;
template <typename... Ts>
bool operator()(Ts&&...) const { return true; }
};
struct PureIsotopeBuilder : DummyBuilder { };
struct NaturalElementBuilder : DummyBuilder { };
struct UpdateElement : DummyBuilder { };
struct Isotope {
std::string getName() const { return _name; }
Isotope(std::string const& name = "unnamed", std::string const& symbol = "?") : _name(name), _symbol(symbol) { }
template <typename T> std::string getProperty(std::string const& name) const {
if (name == "symbol")
return _symbol;
throw std::domain_error("no such property (" + name + ")");
}
private:
std::string _name, _symbol;
};
using MixComponent = std::pair<Isotope, double>;
using isotopesMixture = std::list<MixComponent>;
struct IsotopesMixtureBuilder {
bool operator()(isotopesMixture&/* output*/, std::string const&/* elementSymbol*/, isotopesMixture const& mixture) const {
using namespace boost::adaptors;
// validate weights total only
return std::abs(1.0 - boost::accumulate(mixture | map_values, 0.0)) < 0.00001;
}
};
struct ValidateWeightTotal {
bool operator()(isotopesMixture const& mixture) const {
using namespace boost::adaptors;
bool ok = std::abs(1.0 - boost::accumulate(mixture | map_values, 0.0)) < 0.00001;
return ok;
// or perhaps just :
return ok? ok : throw InconsistentsWeights {};
}
struct InconsistentsWeights : virtual std::runtime_error {
InconsistentsWeights() : std::runtime_error("InconsistentsWeights") {}
};
};
template <typename Isotope>
struct ChemicalDatabaseManager {
static ChemicalDatabaseManager* Instance() {
static ChemicalDatabaseManager s_instance;
return &s_instance;
}
auto& getDatabase() { return _db; }
private:
std::map<int, Isotope> _db {
{ 1, { "H[1]", "H" } },
{ 2, { "H[2]", "H" } },
{ 3, { "Carbon", "C" } },
{ 4, { "U[235]", "U" } },
};
};
template <typename Iterator>
struct ChemicalFormulaParser : qi::grammar<Iterator, isotopesMixture()>
{
ChemicalFormulaParser(): ChemicalFormulaParser::base_type(_start)
{
using namespace qi;
namespace phx = boost::phoenix;
phx::function<PureIsotopeBuilder> build_pure_isotope; // Semantic action for handling the case of pure isotope
phx::function<IsotopesMixtureBuilder> build_isotopes_mixture; // Semantic action for handling the case of pure isotope mixture
phx::function<NaturalElementBuilder> build_natural_element; // Semantic action for handling the case of natural element
phx::function<UpdateElement> update_element;
phx::function<ValidateWeightTotal> validate_weight_total;
// XML database that store all the isotopes of the periodical table
ChemicalDatabaseManager<Isotope>* imgr = ChemicalDatabaseManager<Isotope>::Instance();
const auto& isotopeDatabase=imgr->getDatabase();
// Loop over the database to the spirit symbols for the isotopes names (e.g. H[1],C[14]) and the elements (e.g. H,C)
for (const auto& isotope : isotopeDatabase) {
_isotopeNames.add(isotope.second.getName(),isotope.second.getName());
_elementSymbols.add(isotope.second.template getProperty<std::string>("symbol"), isotope.second.template getProperty<std::string>("symbol"));
}
_mixtureToken = "{" >> +(_isotopeNames >> "(" >> double_ >> ")") >> "}" > eps(validate_weight_total(_val));
_isotopesMixtureToken = (_elementSymbols >> _mixtureToken) [ _pass=build_isotopes_mixture(_val, _1, _2) ];
_pureIsotopeToken = _isotopeNames [ _pass=build_pure_isotope(_val, _1) ];
_naturalElementToken = _elementSymbols [ _pass=build_natural_element(_val, _1) ];
_start = +(
( (_isotopesMixtureToken | _pureIsotopeToken | _naturalElementToken) >>
(double_|attr(1.0)) ) [ _pass=update_element(_val, _1, _2) ]
);
}
private:
//! Defines the rule for matching a prefix
qi::symbols<char, std::string> _isotopeNames;
qi::symbols<char, std::string> _elementSymbols;
qi::rule<Iterator, isotopesMixture()> _mixtureToken;
qi::rule<Iterator, isotopesMixture()> _isotopesMixtureToken;
qi::rule<Iterator, isotopesMixture()> _pureIsotopeToken;
qi::rule<Iterator, isotopesMixture()> _naturalElementToken;
qi::rule<Iterator, isotopesMixture()> _start;
};
int main() {
using It = std::string::const_iterator;
ChemicalFormulaParser<It> parser;
for (std::string const input : {
"C", // --> natural carbon made of C[12] and C[13] in natural abundance
"CH4", // --> methane made of natural carbon and hydrogen
"C2H{H[1](0.8)H[2](0.2)}6", // --> ethane made of natural C and non-natural H made of 80% of hydrogen and 20% of deuterium
"C2H{H[1](0.9)H[2](0.2)}6", // --> invalid mixture (total is 110%?)
"U[235]", // --> pure uranium 235
}) try
{
std::cout << " ============= '" << input << "' ===========\n";
It f = input.begin(), l = input.end();
isotopesMixture mixture;
bool ok = qi::parse(f, l, parser, mixture);
if (ok)
std::cout << "Parsed successfully\n";
else
std::cout << "Parse failure\n";
if (f != l)
std::cout << "Remaining input unparsed: '" << std::string(f, l) << "'\n";
} catch(std::exception const& e) {
std::cout << "Caught exception '" << e.what() << "'\n";
}
}
打印
============= 'C' ===========
Parsed successfully
============= 'CH4' ===========
Parsed successfully
============= 'C2H{H[1](0.8)H[2](0.2)}6' ===========
Parsed successfully
============= 'C2H{H[1](0.9)H[2](0.2)}6' ===========
Caught exception 'boost::spirit::qi::expectation_failure'
============= 'U[235]' ===========
Parsed successfully
关于c++ - 用元素的混合物解析化学式,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/42937368/
好吧,这让我发疯了。我在 spoj 上解决了一个名为 MIXTURES ( http://www.spoj.com/problems/MIXTURES/) 的问题。我不知道为什么我总是遇到段错误。该问
我正在使用 scikit learn 0.15.0。当我尝试导入 sklearn.mixture 时,我得到ImportError: 无法导入名称选择 有什么想法吗? ================
我是一名优秀的程序员,十分优秀!