gpt4 book ai didi

c++ - 使用用户定义的过滤器/运行时表达式评估来过滤对象

转载 作者:行者123 更新时间:2023-11-28 00:12:17 26 4
gpt4 key购买 nike

我想实现一个根据用户定义的标准(如下所述)过滤对象的系统,老实说不知道从哪里开始。如果有现有的库,那就太好了。如果没有,那么指向正确方向的指针也会很好。

我有很多对象,我们称它们为 Cars,它们具有属性,如品牌、型号等。我希望能够让用户为过滤器提供一个字符串,说 "car.make == "honda"&& car.year == "2012""等等。

然后,在我的应用程序运行期间,我希望能够像这样运行检查:if(filter(carobj) == true){ ...。请注意,我正在寻找的内容与列表理解不同,因为我不想过滤列表,而是想查看对象是否满足一组条件。

我认识到这可能有两个组成部分,一个是解析用户的输入,另一个是构建这样一个对象。我有一种感觉,那里有一些相当不错的表达式树解析器可以完成前者的工作,但在后者上我完全迷路了。

过滤器需要快速,因为它将对数百万个对象进行操作,而且我也不能有 boost 依赖项。

最佳答案

下面 - 如何解决这个问题 - 非常经过轻微测试,无疑隐藏了一些错误。如图所示,它仅处理 std::stringint 字段。它将过滤分为表达式到标记 vector 步骤,然后重复使用标记来测试传递给 operator() 的记录。所以 - 没有高度优化,但也不应该非常缓慢。有一些故意的简化,例如您可以比较 "field == 'abc'" 但不能比较 "'abc' == field"。还有很多事情可以做来验证表达式,提供更多关于表达式中解析或评估失败的位置的信息等等。我已经留下了调试信息,就好像有人拿起它他们可能会想要它一样,既要了解它的工作原理,又要调试和改进它。

#include <iostream>
#include <iomanip>
#include <string>
#include <vector>
#include <sstream>
#include <stdexcept>

#define DBG(MSG) do { std::cerr << ':' << __LINE__ << ' ' << MSG << '\n'; } while (false)

#define NEED(WHAT, THROW_MSG) \
do { if (WHAT) break ; \
std::ostringstream oss; \
oss << THROW_MSG; \
throw std::runtime_error(oss.str()); \
} while (false)

struct Queryable
{
virtual int get_field_id(const std::string& field) const = 0;
virtual void load_field(int id, std::string&, int&) const = 0;
};

class Evaluator
{
public:
// lexs expression, optionally proactively verifying field identifiers against *pq
Evaluator(const std::string& expression, const Queryable* pq = nullptr)
{
std::istringstream iss(expression);
char c;
int unmatched_paren = 0;
while (iss >> c)
{
switch (c)
{
case '(': tokens_.emplace_back(LParen); ++unmatched_paren; break;
case ')': tokens_.emplace_back(RParen); --unmatched_paren; break;
case '-': case '0'...'9':
{
iss.unget();
int i;
iss >> i;
tokens_.emplace_back(i);
break;
}
case '\'':
tokens_.emplace_back(StringLit);
iss >> std::noskipws;
while (iss >> c)
if (c == '\'') goto post_lit;
else tokens_.back().s_ += c;
throw std::runtime_error("unterminated string literal");
post_lit:
iss >> std::skipws;
break;
case '&':
NEED(iss.get() == '&', "missing second '&' that'd form AND operator");
tokens_.emplace_back(And);
break;
case '|':
NEED(iss.get() == '|', "missing second '&' that'd form AND operator");
tokens_.emplace_back(Or);
break;
case '<':
if (iss.peek() == '=') { iss.ignore(); tokens_.emplace_back(LE); }
else tokens_.emplace_back(L);
break;
case '>':
if (iss.peek() == '=') { iss.ignore(); tokens_.emplace_back(GE); }
else tokens_.emplace_back(G);
break;
case '!':
if (iss.peek() == '=') { iss.ignore(); tokens_.emplace_back(NE); }
else tokens_.emplace_back(Not);
break;
case '=':
if (iss.peek() == '=') iss.ignore(); // allow = and ==
tokens_.emplace_back(E);
break;
default:
NEED(std::isalpha(c), "can't parse content in expression at "
<< iss.tellg() << " in '" << iss.str() << "', problem text '"
<< iss.str().substr(iss.tellg(), 20) << "'...");
tokens_.emplace_back(Idn);
tokens_.back().s_ += c;
iss >> std::noskipws;
while (iss >> c)
if (!std::isalnum(c)) { iss.unget(); goto post_idn; }
else tokens_.back().s_ += c;
post_idn:
tokens_.back().i_ = pq ? pq->get_field_id(tokens_.back().s_) : 0;
iss >> std::skipws;
}
}
NEED(!unmatched_paren, "unbalanced paren in expression");
DBG("tokens parsed: " << tokens_);
}

bool operator()(const Queryable& q) const
{
size_t token_pos = 0;
return eval(q, token_pos);
}

private:
bool eval(const Queryable& q, size_t& token_pos) const
{
bool so_far = true;
bool hanging_not = false;
std::string s;
int i;
for ( ; token_pos < tokens_.size(); ++token_pos)
{
const Token& t = tokens_[token_pos];
switch (t.type_)
{
case Idn:
{
int id = t.i_ ? t.i_ : q.get_field_id(t.s_);
q.load_field(id, s, i);
DBG("loaded field " << id << ':' << t.s_ << ", s '" << s << "', i " << i);
const Token& op = tokens_.at(++token_pos);
const Token& rhs = tokens_.at(++token_pos);
switch(op.type_)
{
case L: so_far = id > 0 ? s < rhs.s_ : i < rhs.i_; break;
case LE: so_far = id > 0 ? s <= rhs.s_ : i <= rhs.i_; break;
case E: so_far = id > 0 ? s == rhs.s_ : i == rhs.i_; break;
case GE: so_far = id > 0 ? s >= rhs.s_ : i >= rhs.i_; break;
case G: so_far = id > 0 ? s > rhs.s_ : i > rhs.i_; break;
case NE: so_far = id > 0 ? s != rhs.s_ : i != rhs.i_; break;
default:
NEED(false, "identifier followed by " << op
<< " but only an operator is supported");
}
DBG(" " << op << ' ' << rhs << " -> " << so_far);
break;
}
case And:
case Or:
if (so_far == (t.type_ == Or)) // false && ... true || ...
{
int depth = 0;
while (token_pos < tokens_.size() && depth >= 0)
if (tokens_[++token_pos].type_ == LParen) ++depth;
else if (tokens_[token_pos].type_ == RParen) --depth;
return so_far;
}
break;

case Not: hanging_not = true; break;

case LParen:
so_far = hanging_not ^ eval(q, ++token_pos);
hanging_not = false;
DBG("post LParen so_far " << so_far << ", token_pos " << token_pos);
break;

case RParen: return so_far;

default:
throw std::runtime_error("unexpect token");
}
}
return so_far;
}

enum Type { Idn, StringLit, IntLit, LParen, RParen, Not, And, Or, L, LE, E, GE, G, NE };
struct Token
{
Type type_; std::string s_; int i_;
Token(Type type) : type_(type) { }
Token(int i) : type_(IntLit), i_(i) { }
Token(Type type, const std::string& s) : type_(type), s_(s) { }
Token(Type type, const std::string&& s) : type_(type), s_(s) { }
};
std::vector<Token> tokens_;

friend std::ostream& operator<<(std::ostream& os, Type t)
{
switch (t)
{
case Idn: return os << "Idn";
case StringLit: return os << "StringLit";
case IntLit: return os << "IntLit";
case LParen: return os << "LParen";
case RParen: return os << "RParen";
case Not: return os << "Not";
case And: return os << "And";
case Or: return os << "Or";
case L: return os << 'L';
case LE: return os << "LE";
case E: return os << 'E';
case GE: return os << "GE";
case G: return os << 'G';
case NE: return os << "NE";
default: throw std::runtime_error("invalid Token type");
}
}

friend std::ostream& operator<<(std::ostream& os, const Token& t)
{
os << t.type_;
if (t.type_ == Idn || t.type_ == StringLit) return os << ":'" << t.s_ << '\'';
if (t.type_ == IntLit) return os << ':' << t.i_;
return os;
}

friend std::ostream& operator<<(std::ostream& os, const std::vector<Token>& v)
{
os << '{';
size_t pos = 0;
for (const auto& t : v) os << ' ' << pos++ << ':' << t;
return os << " }";
}
};

示例用法:

struct Car : Queryable
{
// negative field ids denote integral fields, positive strings, 0 is reserved
enum Fields { Make = 1, Model, Year = -1};

Car(const std::string& make, const std::string& model, int year)
: make_(make), model_(model), year_(year)
{ }

int get_field_id(const std::string& field) const override
{
if (field == "make") return (int)Make;
if (field == "model") return (int)Model;
if (field == "year") return (int)Year;
throw std::runtime_error("attempt to lookup a field that doesn't exist");
}

void load_field(int id, std::string& s, int& i) const override
{
switch (id)
{
case Make: s = make_; break;
case Model: s = model_; break;
case Year: i = year_; break;
default:
throw std::runtime_error("attempt to retrieve a field using unknown field id");
}
}

std::string make_, model_;
int year_;
};

#define ASSERT_OP(X, OP, Y) \
do { \
const auto& x = (X); const auto& y = (Y); \
if (x OP y) break; \
std::cerr << "FAIL " << #X " " #OP " " #Y << " at :" << __LINE__ << '\n'; \
} while (false)

#define ASSERT_EQ(X, Y) ASSERT_OP(X, ==, Y)
#define ASSERT(X) ASSERT_OP(X, ==, true)
#define ASSERT_NOT(X) ASSERT_OP(X, ==, false)

int main()
{
Evaluator e("make == 'Honda' && (year == 1999 || year > 2005)");
ASSERT(e(Car { "Honda", "Fit", 2008 }));
ASSERT_NOT(e(Car { "Nissan", "GT-R", 2011 }));
ASSERT(e(Car { "Honda", "NSX", 1999 }));

// can also do field id lookups at Evaluator construction/lexing time for faster operator()...
// (but then the Evaluator can't be used against other types with same field names but
// differing field ids)
Car car { "Honda", "Civic", 2012 };
Evaluator e2("make == 'Honda' && (year == 1999 || year > 2005)", &car);
ASSERT(e2(car));
ASSERT(e2(Car { "Honda", "Fit", 2008 }));
ASSERT_NOT(e2(Car { "Nissan", "GT-R", 2011 }));
ASSERT(e2(Car { "Honda", "NSX", 1999 }));
}

关于 coliru.stacked-crooked.com .

FWIW,任何对这个问题空间感兴趣但确实有可用的 boost 的读者可能更喜欢使用 boost spirit 和/或使用 boost::variant 来处理不同的类型。

关于c++ - 使用用户定义的过滤器/运行时表达式评估来过滤对象,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/32388693/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com