gpt4 book ai didi

python - 将逻辑字符串转换为 JSON

转载 作者:行者123 更新时间:2023-11-28 16:56:26 25 4
gpt4 key购买 nike

我想转换自:

输入:

"#serviceRequest and @charges:getRoamingCharges or @plans:dataplans"

其中 '#' - 意图 '@' - 实体 ':'-值

输出:

{"and":[
{"some" : [ {"var":"intents"}, {"==":[{"var":"intent"}]},
"serviceRequest"]},
{"or":[
{"and":[{"some" : [ {"var":"entities"}, {"==":[{"var":"entity"},
"charges"]} ]},
{"some" : [ {"var":"entities"}, {"==":[{"var":"value"},
"getRoamingCharges"]} ]}]
},{"and":[
{"some" : [ {"var":"entities"}, {"==":[{"var":"entity"}, "plans"]}
]},
{"some" : [ {"var":"entities"}, {"==":[{"var":"value"}, "data
plans"]} ]}
]}
]}
]}

我尝试过的:

import pyparsing

identifier = pyparsing.QuotedString('"')
operator = (
pyparsing.Literal("==") |
pyparsing.Literal("≠") |
pyparsing.Literal("≥") |
pyparsing.Literal("≤") |
pyparsing.Literal("<") |
pyparsing.Literal(">")
)
value = pyparsing.QuotedString('"')

match_format = identifier + operator + value

#print(match_format.parseString('"foobar"=="123"'))
def list_to_dict(pos, tokens):
dic = {}
lis =[]
print(tokens)
abc= {tokens[1]: {tokens[2], tokens[0]}}
print(abc)
lis.append(abc)
dic['bfeh']=lis
return tokens


match_format = (identifier + operator +
value).setParseAction(list_to_dict)

print(match_format.parseString('"intent"=="serviceRequest"'))

给出:

{'==': {'intent', 'serviceRequest'}}

请帮助我使用解析 (Python) 或任何您想要的替代方法?

最佳答案

您可以创建一个更简单的分词器来链接解析器:

import re
class Token:
grammar, _types = r'and|or|#|:|@|\w+', [('and', 'cond'), ('or', 'cond'), ('#', 'intent'), ('@', 'entity'), (':', 'value'), (r'\w+', 'label')]
def __init__(self, val, _type):
self.val, self._type = val, _type
@property
def is_cond(self):
return self._type == 'cond'
@property
def is_desc(self):
return self._type in {'intent', 'entity', 'value'}
@property
def var_name(self):
return f'{self._type}s' if self._type == 'intent' else 'entities'
@classmethod
def tokenize(cls, _input):
return [cls(i, [b for a, b in cls._types if re.findall(a, i)][0]) for i in re.findall(cls.grammar, _input)]
def __repr__(self):
return f'{self.__class__.__name__}(value={self.val}, type={self._type})'

现在,可以创建一个简单的解析器:

from itertools import groupby
class AST:
def __init__(self, stream):
self.stream = iter(stream)
def p_parse(self, stream):
_r, _id, _name = [], None, ''
for i in stream:
if i._type == 'value':
if _name:
_r.append([{'var':_id.var_name if _id is not None else 'entities'}, {"==":[{"var":'value' if _id is None else _id._type}, _name]}])
_id, _name = None, ''
elif i.is_desc:
_id = i
else:
_name = i.val
_r.append([{'var':i.var_name if _id is not None else 'entities'}, {"==":[{"var":'value' if _id is None else _id._type}, _name]}])
_id, _name = None, ''
return {'some':_r[0]} if len(_r) == 1 else {'and':[{'some':_r[0]}, {'some':_r[1]}]}
def parse(self, seen=None):
a, b = next(self.stream, [None, None])
if a is not None:
return self.parse(self.p_parse(b)) if not a else {b[0].val:[seen, self.parse()]}
return seen
@classmethod
def _group(cls, _tokens):
return cls([(a, list(b)) for a, b in groupby(_tokens, key=lambda x:x.is_cond)])

现在,组合组件:

s = "#serviceRequest and @charges:getRoamingCharges or @plans:dataplans"
result = AST._group(Token.tokenize(s)).parse()

输出:

{'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'intent'}, 'serviceRequest']}]}, {'or': [{'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'entity'}, 'charges']}]}, {'some': [{'var': 'entities'}, {'==': [{'var': 'value'}, 'getRoamingCharges']}]}]}, {'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'entity'}, 'plans']}]}, {'some': [{'var': 'entities'}, {'==': [{'var': 'value'}, 'dataplans']}]}]}]}]}

毫无疑问,这个问题有更短的解决方案,但是,分词器和解析器的目标都是让你在未来更容易地扩展这个解决方案来处理不能被更“hackish”处理的输入解决方案。

关于python - 将逻辑字符串转换为 JSON,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/57901639/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com