gpt4 book ai didi

c++ - 用boost spirit 完全解码http header value

转载 作者:太空狗 更新时间:2023-10-29 20:22:28 25 4
gpt4 key购买 nike

再一次,我发现自己想要振奋 spirit 。我再次发现自己被它打败了。

HTTP header 采用一般形式:

text/html; q=1.0, text/*; q=0.8, image/gif; q=0.6, image/jpeg; q=0.6, image/*; q=0.5, */*; q=0.1

value *OWS [; *OWS name *OWS [= *OWS possibly_quoted_value] *OWS [...]] *OWS [ , <another value> ...]

所以在我看来,这个 header 解码为:

value[0]: 
text/html
params:
name : q
value : 1.0
value[1]:
text/*
params:
name : q
value : 0.8
...

等等。

我敢肯定,对于任何知道如何做的人来说,boost::spirit::qi 语法都是微不足道的。

我谦虚地请求您的帮助。

例如,这里是解码 Content-Type 的代码概要 header ,仅限于 type/subtype 形式的一个值, 具有 <sp> ; <sp> token=token|quoted_string 形式的任意数量的参数

template<class Iter>
void parse(ContentType& ct, Iter first, Iter last)
{
ct.mutable_type()->append(to_lower(consume_token(first, last)));
consume_lit(first, last, '/');
ct.mutable_subtype()->append(to_lower(consume_token(first, last)));
while (first != last) {
skipwhite(first, last);
if (consume_char_if(first, last, ';'))
{
auto p = ct.add_parameters();
skipwhite(first, last);
p->set_name(to_lower(consume_token(first, last)));
skipwhite(first, last);
if (consume_char_if(first, last, '='))
{
skipwhite(first, last);
p->set_value(consume_token_or_quoted(first, last));
}
else {
// no value on this parameter
}
}
else if (consume_char_if(first, last, ','))
{
// normally we should get the next value-token here but in the case of Content-Type
// we must barf
throw std::runtime_error("invalid use of ; in Content-Type");
}
}
}

ContentType& populate(ContentType& ct, const std::string& header_value)
{
parse(ct, header_value.begin(), header_value.end());
return ct;
}

最佳答案

好的,经过 24 小时的英勇奋斗(好吧,不是真的 - 更像是一遍又一遍地阅读手册......),我找到了行之有效的方法。 p>

我绝对无法胜任 boost::spirit。如果有人可以改进这个答案,请张贴。

此 spirit 状态机采用 header 的值(带有一个可选参数化值)并将其转换为 content_type 结构。

我对 HTTP 标准的业余阅读表明某些 header 具有以下形式(此处的空格表示任意数量的空格,值可能被引用或不被引用:

Header-Name: tokena/tokenb [; param1 = "value" [; param2 = value]...]

而其他人有更一般的形式:

Header-Name: token [; param1 = "value"[; param2 = value]...] [ , token ...]

此代码涵盖第一种情况 - 即 HTTP Content-Type header 值。我需要扩展它以满足 Accept header (它可以用参数宣传多个值)——稍后会出现。

这是代码。请务必告诉我如何改进它!!

#define BOOST_SPIRIT_DEBUG
#include <gtest/gtest.h>
#include <boost/spirit/include/qi.hpp>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_char.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <utility>
#include <vector>
#include <string>
#include <boost/variant.hpp>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

using unary_parameter = std::string;

struct binary_parameter
{
std::string name;
std::string value;
};
BOOST_FUSION_ADAPT_STRUCT(binary_parameter,
(std::string, name)
(std::string, value))

using parameter = boost::variant<unary_parameter, binary_parameter>;

struct type_subtype
{
std::string type;
std::string subtype;
};
BOOST_FUSION_ADAPT_STRUCT(type_subtype,
(std::string, type)
(std::string, subtype))

using content_type_pair = std::pair<std::string, std::string>;

struct content_type
{
type_subtype type;
std::vector<parameter> params;
};

BOOST_FUSION_ADAPT_STRUCT(content_type,
(type_subtype, type)
(std::vector<parameter>, params))

template<class Iterator>
struct token_grammar : qi::grammar<Iterator, content_type()>
{

token_grammar() : token_grammar::base_type(content_type_rule)
{
using ascii::char_;
using qi::omit;
using qi::eoi;

CR = char_('\r');
LF = char_('\n');
CRLF = CR >> LF;
SP = char_(' ');
HT = char_('\t');
LWS = -CRLF >> +(SP | HT);

UPALPHA = char_('A', 'Z');
LOALPHA = char_('a', 'z');
ALPHA = UPALPHA | LOALPHA;
DIGIT = char_('0', '9');
CTL = char_(0, 31) | char_(127);
QUOT = char_('"');
TEXT = (char_ - CTL) | HT;

separator = char_('(') | ')' | '<' | '>' | '@'
| ',' | ';' | ':' | '\\' | '"'
| '/' | '[' | ']' | '?' | '='
| '{' | '}' | SP | HT;

end_sequence = separator | space;
token = +(char_ - separator);

qdtext = char_ - char_('"') - '\\';
quoted_pair = omit[char_('\\')] >> char_;
quoted_string = omit[char_('"')] >> *(qdtext | quoted_pair) >> omit[char_('"')];
value = quoted_string | token ;

type_subtype_rule = token >> '/' >> token;
name_only = token;
nvp = token >> omit[*SP] >> omit['='] >> omit[*SP] >> value;
any_parameter = omit[*SP] >> omit[char_(';')] >> omit[*SP] >> (nvp | name_only);
content_type_rule = type_subtype_rule >> *any_parameter;

BOOST_SPIRIT_DEBUG_NODES((qdtext)(quoted_pair)(quoted_string)(value)(token)(separator));
}

qi::rule<Iterator, void()> CR, LF, CRLF, SP, HT, LWS, CTL, QUOT;
qi::rule<Iterator, char()> UPALPHA, LOALPHA, ALPHA, DIGIT, TEXT, qdtext, quoted_pair;
qi::rule<Iterator, void()> separator, space, end_sequence;
qi::rule<Iterator, std::string()> quoted_string, token, value;
qi::rule<Iterator, type_subtype()> type_subtype_rule;
qi::rule<Iterator, unary_parameter()> name_only;
qi::rule<Iterator, binary_parameter()> nvp;
qi::rule<Iterator, parameter()> any_parameter;
qi::rule<Iterator, content_type()> content_type_rule;

};

TEST(spirit_test, test1)
{
token_grammar<std::string::const_iterator> grammar{};

std::string test = R"__test(application/json )__test";
content_type ct;
bool r = qi::parse(test.cbegin(), test.cend(), grammar, ct);
EXPECT_EQ("application", ct.type.type);
EXPECT_EQ("json", ct.type.subtype);
EXPECT_EQ(0, ct.params.size());

ct = {};
test = R"__test(text/html ; charset = "ISO-8859-5")__test";
qi::parse(test.cbegin(), test.cend(), grammar, ct);
EXPECT_EQ("text", ct.type.type);
EXPECT_EQ("html", ct.type.subtype);
ASSERT_EQ(1, ct.params.size());
ASSERT_EQ(typeid(binary_parameter), ct.params[0].type());
auto& x = boost::get<binary_parameter>(ct.params[0]);
EXPECT_EQ("charset", x.name);
EXPECT_EQ("ISO-8859-5", x.value);

}

关于c++ - 用boost spirit 完全解码http header value ,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/37840682/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com