gpt4 book ai didi

python - 比较字典列表并合并到一个python

转载 作者:行者123 更新时间:2023-12-01 07:56:27 25 4
gpt4 key购买 nike

我正在编写一个Python程序来将相似的字典合并为一个。我有两个字典的列表。我有一个空的 filter_cache 字典。我有 input_completed =false 变量。我将使用 for 循环获取字典。输入第一个字典后,它会被处理并将相似的内容合并到一个字典中,然后我将结果分配给 filter_cache 字典。通过 for 循环输入列表中的下一个字典,它会被处理并将其合并为一个。之后,由于data为空,我将input_completed更改为true。接下来,我想比较 filter_cache 中的前一个字典和当前的 filter_cache ,然后使用某个函数再次将其组合并将其分配给 fiter_cache >。如何使其成为可能

代码如下:

from itertools import groupby

field_to_be_check = "state"
merger = ["city", "haps"]
merge_name = ["cities", "my_haps"]

data = [
[{'haps': 'hap0', 'state': 'tamil nadu','ads': 'ad1', 'city': 'tenkasi'},
{'haps': 'hap0', 'state': 'tamil nadu','ads': 'ad4', 'city': 'nagerkoil'},
{'haps': 'hap0', 'state': 'tamil nadu','ads': 'ad1', 'city': 'tuticorin'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'kolikodu'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'kottayam'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'idukki'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Akola'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Washim'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Jalna'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Latur'}],
[{'haps': 'hap1', 'state': 'tamil nadu','ads': 'ad1', 'city': 'madurai'},
{'haps': 'hap0', 'state': 'tamil nadu','ads': 'ad1', 'city': 'chennai'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'palakad'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'guruvayor'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Nanded'},
]
]


input_completed = False
filter_cache = {}
for datas in data:
#Function to make the merger lists
def process_group(group, merger_item):

item_set = set()
item_list = []
for item in group:
item_set.add(item[merger_item])

for item in item_set:
item_list.append({merger_item: item})

return item_list

#Group on haps, state and ads
grp = groupby(datas,key=lambda x:(x[field_to_be_check]))
result = []

#Iterate through the group and build your result list
for model, group in grp:
cities_dict = {}

cities_dict[field_to_be_check] = model

group_list = list(group)

#Make the list for merger fields
for idx, name in enumerate(merger):
cities_dict[merge_name[idx]] = process_group(group_list, name)

result.append(cities_dict)
filter_cache.update({'aggregate':result})
print(filter_cache)


final_result = filter_cache

我得到的输出:

{'aggregate': [{'state': 'tamil nadu', 'my_haps': [{'haps': 'hap0'}], 'cities': [{'city': 'tuticorin'}, {'city': 'tenkasi'}, {'city': 'nagerkoil'}]}, {'state': 'kerala', 'my_haps': [{'haps': 'hap1'}], 'cities': [{'city': 'idukki'}, {'city': 'kolikodu'}, {'city': 'kottayam'}]}, {'state': 'mumbai', 'my_haps': [{'haps': 'hap2'}], 'cities': [{'city': 'Akola'}, {'city': 'Jalna'}, {'city': 'Latur'}, {'city': 'Washim'}]}]}
{'aggregate': [{'state': 'tamil nadu', 'my_haps': [{'haps': 'hap1'}, {'haps': 'hap0'}], 'cities': [{'city': 'madurai'}, {'city': 'chennai'}]}, {'state': 'kerala', 'my_haps': [{'haps': 'hap1'}], 'cities': [{'city': 'palakad'}, {'city': 'guruvayor'}]}, {'state': 'mumbai', 'my_haps': [{'haps': 'hap2'}], 'cities': [{'city': 'Nanded'}]}]}

所需输出:

{'aggregate': [{'state': 'tamil nadu', 'my_haps': [{'haps': 'hap0'},{'haps': 'hap1'}], 'cities': [{'city': 'tuticorin'}, {'city': 'tenkasi'}, {'city': 'nagerkoil'},{'city': 'madurai'}, {'city': 'chennai'}]}, {'state': 'kerala', 'my_haps': [{'haps': 'hap1'}], 'cities': [{'city': 'idukki'}, {'city': 'kolikodu'}, {'city': 'kottayam'},{'city': 'palakad'}, {'city': 'guruvayor'}]}, {'state': 'mumbai', 'my_haps': [{'haps': 'hap2'}], 'cities': [{'city': 'Akola'}, {'city': 'Jalna'}, {'city': 'Latur'}, {'city': 'Washim'},{'city': 'Nanded'}]}]}

最佳答案

功能:

def group_dicts_from_list(lst, group_by, merge_rules, result):
if not lst or not isinstance(lst, list) or not group_by or not merge_rules or \
not isinstance(merge_rules, dict) or not isinstance(result, dict):
return
if "aggregate" not in result:
result["aggregate"] = []
for item in lst:
if isinstance(item, dict):
if group_by in item:
for res in result["aggregate"]:
if res[group_by] == item[group_by]:
tmp = res
break
else:
tmp = {group_by: item[group_by]}
result["aggregate"].append(tmp)
for src, dst in merge_rules.items():
if src in item:
if dst not in tmp:
tmp[dst] = []
src_tmp = {src: item[src]}
if src_tmp not in tmp[dst]:
tmp[dst].append(src_tmp)
elif isinstance(item, list):
group_dicts_from_list(item, group_by, merge_rules, result) # !!! recursion !!!

用法:

field_to_be_check = "state"
my_merge_rules = {
"city": "cities",
"haps": "my_haps",
}
data = [
[{'haps': 'hap0', 'state': 'tamil nadu', 'ads': 'ad1', 'city': 'tenkasi'},
{'haps': 'hap0', 'state': 'tamil nadu', 'ads': 'ad4', 'city': 'nagerkoil'},
{'haps': 'hap0', 'state': 'tamil nadu', 'ads': 'ad1', 'city': 'tuticorin'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'kolikodu'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'kottayam'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'idukki'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Akola'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Washim'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Jalna'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Latur'}],
[{'haps': 'hap1', 'state': 'tamil nadu', 'ads': 'ad1', 'city': 'madurai'},
{'haps': 'hap0', 'state': 'tamil nadu', 'ads': 'ad1', 'city': 'chennai'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'palakad'},
{'haps': 'hap1', 'state': 'kerala', 'ads': 'ad2', 'city': 'guruvayor'},
{'haps': 'hap2', 'state': 'mumbai', 'ads': 'ad3', 'city': 'Nanded'},
]
]

result = {}
group_dicts_from_list(data, field_to_be_check, my_merge_rules, result)

print(result)

输出:

{'aggregate': [{'state': 'tamil nadu', 'cities': [{'city': 'tenkasi'}, {'city': 'nagerkoil'}, {'city': 'tuticorin'}, {'city': 'madurai'}, {'city': 'chennai'}], 'my_haps': [{'haps': 'hap0'}, {'haps': 'hap1'}]}, {'state': 'kerala', 'cities': [{'city': 'kolikodu'}, {'city': 'kottayam'}, {'city': 'idukki'}, {'city': 'palakad'}, {'city': 'guruvayor'}], 'my_haps': [{'haps': 'hap1'}]}, {'state': 'mumbai', 'cities': [{'city': 'Akola'}, {'city': 'Washim'}, {'city': 'Jalna'}, {'city': 'Latur'}, {'city': 'Nanded'}], 'my_haps': [{'haps': 'hap2'}]}]}

评论:

我已经替换了

merger = ["city", "haps"]
merge_name = ["cities", "my_haps"]

my_merge_rules = {
"city": "cities",
"haps": "my_haps",
}

因为我发现它更可靠(它可以防止列表长度不同的情况)

关于python - 比较字典列表并合并到一个python,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/55953177/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com