gpt4 book ai didi

python - 使用 Python 比较两个文件夹中的文件

转载 作者:行者123 更新时间:2023-11-30 23:15:14 24 4
gpt4 key购买 nike

我正在尝试比较两个文件夹“test1”和“test2”中的所有文件对(具有相同文件名的文件),并打印它们可能存在的任何差异。我下面有这段代码,部分工作正常。它获取具有相同文件名的文件,但仅比较第一个文件对,而不比较文件夹中的所有文件。我如何解决它?示例 csv 文件可能如下所示

import os
from collections import defaultdict
import csv

def relative_files(path):
for root, dirnames, files in os.walk(path):
relroot = os.path.relpath(root, path)
for filename in files:
yield os.path.join(relroot, filename)

def difference_in_files (root_one, root_two):
files_one = set(relative_files(root_one))
for same in files_one.intersection(relative_files(root_two)):
try:
with open(os.path.join(root_one, same)) as csvfile, open(os.path.join(root_two, same)) as csvfile2:
d = defaultdict(list)
d2 = defaultdict(list)
header = next(csvfile).rstrip().split("\t")
h1 = next((i for i, x in enumerate(header) if x == "h1"),None)
h2 = next((i for i, x in enumerate(header) if x == "h2"),None)
header2 = next(csvfile2).rstrip().split("\t")
h12 = next((i for i, x in enumerate(header2) if x == "h1"),None)
h22 = next((i for i, x in enumerate(header2) if x == "h2"),None)
if h1 is not None and h2 is not None:
r = csv.reader(csvfile,delimiter="\t")
for row in r:
d[row[h1]].append(row[h2])
if h12 is not None and h22 is not None:
r = csv.reader(csvfile2,delimiter="\t")
for row in r:
d2[row[h12]].append(row[h22])
d2 = {k: list(set(v)) for k,v in dict(d2).items()}
d = {k: list(set(v)) for k,v in dict(d).items()}

diff = dict([ (key, d2.get(key, d.get(key))) for key in set(d.keys()+d2.keys()) if (key in d and (not key in d2 or d2[key] != d[key])) or (key in d2 and (not key in d or d[key] != d2[key])) ])
diff2 = dict([ (key, d.get(key, d2.get(key))) for key in set(d2.keys()+d.keys()) if (key in d2 and (not key in d or d[key] != d2[key])) or (key in d and (not key in d2 or d2[key] != d[key])) ])
return diff, diff2
except TypeError:
pass


if __name__ == '__main__':
root_one = 'test1'
root_two = 'test2'
difference_in_files (root_one, root_two)

test1/csv1.csv

h1,h2,h3
aa,90,io
bb,86,0n

测试1.csv2.csv

h1,h8,h2
jj,kj,64
df,hj,12

test2/csv1.csv

h1,h2,h3
aa,90,io
bb,66,0n

test2.csv2.csv

h1,h8,h2
jj,kj,64
df,hj,12
mm,h9,09

它仅比较两个文件中的 csv1,而不比较 csv2。

最佳答案

所以只是做出正式答复。问题是:

return diff, diff2 is in the for loop. It will be executed at the end of the first iteration of the loop. Thus no other iterations will be executed.

关于python - 使用 Python 比较两个文件夹中的文件,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/28356302/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com