gpt4 book ai didi

python - 将多个 DBF (csv) 文件合并为一个,沿列追加

转载 作者:太空宇宙 更新时间:2023-11-04 03:58:30 24 4
gpt4 key购买 nike

我已经检查了很多 cvs 文件合并,但运气不佳。这是我当前的脚本。结果是只有 1 个数据文件被写入或文件被附加到行而不是列。我添加了\n 来尝试避免 Row 问题。

import os
yolo = []
location = os.listdir("C:\Users\jcm\Desktop\RO")
for filename in location:
#print filename
if "(Wide)" in filename:
yolo.append(filename)
total = len(yolo)
with open("out4.dbf", "a") as f:

for num in yolo:
for line in open("C:\Users\jcm\Desktop\RO\\" +num,"rb"):
f.write(line+"\n")
print line+"\n"

打印行\n 正在打印所有数据,但不会将其添加到附加文件中。这是数据库文件格式。我找到了将 DBF 格式化为 csv 的方法,但没有合并数千个数据点。第一行是字段名,第二行是数据类型,第三行+是数据。

这是我用来过滤 DBF 文件的代码。我已经在我正在使用的底部添加了我的代码。

import struct, datetime, decimal, itertools      
def dbfreader(f):
"""Returns an iterator over records in a Xbase DBF file.

The first row returned contains the field names.
The second row contains field specs: (type, size, decimal places).
Subsequent rows contain the data records.
If a record is marked as deleted, it is skipped.

File should be opened for binary reads.

"""
# See DBF format spec at:
# http://www.pgts.com.au/download/public/xbase.htm#DBF_STRUCT

numrec, lenheader = struct.unpack('<xxxxLH22x', f.read(32))
numfields = (lenheader - 33) // 32

fields = []
for fieldno in xrange(numfields):
name, typ, size, deci = struct.unpack('<11sc4xBB14x', f.read(32))
name = name.replace('\0', '') # eliminate NULs from string
fields.append((name, typ, size, deci))
yield [field[0] for field in fields]
yield [tuple(field[1:]) for field in fields]

terminator = f.read(1)
assert terminator == '\r'

fields.insert(0, ('DeletionFlag', 'C', 1, 0))
fmt = ''.join(['%ds' % fieldinfo[2] for fieldinfo in fields])
fmtsiz = struct.calcsize(fmt)
for i in xrange(numrec):
record = struct.unpack(fmt, f.read(fmtsiz))
if record[0] != ' ':
continue # deleted record
result = []
for (name, typ, size, deci), value in itertools.izip(fields, record):
if name == 'DeletionFlag':
continue
if typ == "N":
value = value.replace('\0', '').lstrip()
value = value.replace(' ', '').lstrip()
if value == '':
value = 0

elif deci:
value = decimal.Decimal(value)
else:
value = int(value)
elif typ == 'DATE':
y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8])
value = datetime.date(y, m, d)
elif typ == 'L':
value = (value in 'YyTt' and 'T') or (value in 'NnFf' and 'F') or '?'
elif typ == 'F':
value = float(value)
result.append(value)
yield result
yoloies = []
yolo = []
yolos = []
def dbfwriter(f, fieldnames, fieldspecs, records):

""" Return a string suitable for writing directly to a binary dbf file.

File f should be open for writing in a binary mode.

Fieldnames should be no longer than ten characters and not include \x00.
Fieldspecs are in the form (type, size, deci) where
type is one of:
C for ascii character data
M for ascii character memo data (real memo fields not supported)
D for datetime objects
N for ints or decimal objects
L for logical values 'T', 'F', or '?'
size is the field width
deci is the number of decimal places in the provided decimal object
Records can be an iterable over the records (sequences of field values).

"""
# header info
ver = 3
now = datetime.datetime.now()
yr, mon, day = now.year-1900, now.month, now.day
numrec = len(records)
numfields = len(fieldspecs)
lenheader = numfields * 32 + 33
lenrecord = sum(field[1] for field in fieldspecs) + 1
hdr = struct.pack('<BBBBLHH20x', ver, yr, mon, day, numrec, lenheader, lenrecord)
f.write(hdr)

# field specs
for name, (typ, size, deci) in itertools.izip(fieldnames, fieldspecs):
name = name.ljust(11, '\x00')
fld = struct.pack('<11sc4xBB14x', name, typ, size, deci)
f.write(fld)

# terminator
f.write('\r')

# records
for record in records:
f.write(' ') # deletion flag
for (typ, size, deci), value in itertools.izip(fieldspecs, record):
if typ == "N":
value = str(value).rjust(size, ' ')

elif typ == 'DATE':
value = value.strftime('%Y%m%d')
elif typ == 'L':
value = str(value)[0].upper()
else:
value = str(value)[:size].ljust(size, ' ')
#assert len(value) == size
f.write(value)
yoloies.append(record[1])
yolo.append(int(float(record[6])))
yolos.append(int(record[0]))
# End of file
f.write('\x1A')


# -------------------------------------------------------
# Example calls
if __name__ == '__main__':
import sys, csv
from cStringIO import StringIO
from operator import itemgetter

# Read a database
filename = 'out.dbf'
if len(sys.argv) == 2:
filename = sys.argv[1]
f = open(filename, 'rb')
db = list(dbfreader(f))
f.close()
for record in db:
print record
fieldnames, fieldspecs, records = db[0], db[1], db[2:]

# Alter the database
#del records[0]
#records.sort(key=itemgetter(4))

# Remove a field
#del fieldnames[3]
#del fieldspecs[3]
#records = [rec[1:] for rec in records]

# Create a new DBF
f = StringIO()
dbfwriter(f, fieldnames, fieldspecs, records)

# Read the data back from the new DBF
print '-' * 20
f.seek(0)
for line in dbfreader(f):
print line
f.close()

# Convert to CSV
print '.' * 20
f = StringIO()
csv.writer(f).writerow(fieldnames)
csv.writer(f).writerows(records)
print "break"
#filename = 'MyDBF.cvs'
#g = open(filename, 'w')
#g.write(f.getvalue())
print f.getvalue()
f.close()
print "break"
#jcm code
intensities = []
intensities = zip(yolos,yoloies, yolo)

print intensities
with open("yolosss.csv", "w") as out_file:
for date, time, intensity in zip(yolos, yoloies, yolo):
out_file.write("%i,%s,%i\n" % (date, time, intensity))
print "done"

数据样本。

['Date', 'Time', 'Millitm', 'Marker', '0', 'Sts_00', '1', 'Sts_01', '2', 'Sts_02', '3', 'Sts_03', '4', 'Sts_04', '5', 'Sts_05', '6', 'Sts_06', '7', 'Sts_07', '8', 'Sts_08', '9', 'Sts_09', '10', 'Sts_10', '11', 'Sts_11', '12', 'Sts_12']
[('D', 8, 0), ('C', 8, 0), ('N', 3, 0), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 1, 0), ('C', 1, 0), ('F', 1, 0), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0)]
['20130531', '00:00:28', 977, 'B', 548.84643555, ' ', 0.0, ' ', 83.11103058, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.93495178, ' ', 7.26524115, ' ', 0.0, ' ', 231.49482727, ' ', 16.30217171, ' ', 12.48698235, ' ']
['20130531', '00:05:13', 577, ' ', 571.55731201, ' ', 0.0, ' ', 81.2725296, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.93943787, ' ', 7.26580191, ' ', 0.0, ' ', 230.02883911, ' ', 16.31218529, ' ', 12.94760895, ' ']
['20130531', '00:20:13', 701, ' ', 547.88513184, ' ', 0.0, ' ', 82.43811798, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.9360733, ' ', 7.27926016, ' ', 0.0, ' ', 230.24513245, ' ', 16.202034, ' ', 11.91620636, ' ']
['20130531', '00:35:13', 885, ' ', 613.19396973, ' ', 0.0, ' ', 81.4948349, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.9276619, ' ', 7.29215765, ' ', 0.0, ' ', 230.04084778, ' ', 16.14195251, ' ', 11.29536152, ' ']
['20130531', '00:50:14', 60, ' ', 466.35424805, ' ', 0.0, ' ', 79.93270874, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.92541885, ' ', 7.3028121, ' ', 0.0, ' ', 230.50949097, ' ', 16.02178955, ' ', 10.67451763, ' ']
['20130531', '01:05:14', 174, ' ', 449.89187622, ' ', 0.0, ' ', 83.67579651, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.92373657, ' ', 7.31290579, ' ', 0.0, ' ', 233.10502625, ' ', 15.83153057, ' ', 10.29399967, ' ']
['20130531', '01:20:15', 19, ' ', 444.96517944, ' ', 0.0, ' ', 76.59817505, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.92429733, ' ', 7.31010199, ' ', 0.0, ' ', 229.95674133, ' ', 15.71136761, ' ', 10.48425865, ' ']
['20130531', '01:35:15', 94, ' ', 428.08221436, ' ', 0.0, ' ', 83.30929565, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.92373657, ' ', 7.29944754, ' ', 0.0, ' ', 232.22782898, ' ', 15.61123085, ' ', 11.28534794, ' ']
['20130531', '01:50:15', 238, ' ', 485.58041382, ' ', 0.0, ' ', 81.09828949, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.93831635, ' ', 7.30785894, ' ', 0.0, ' ', 229.75245667, ' ', 15.39093208, ' ', 10.68453121, ' ']
['20130531', '02:05:15', 382, ' ', 437.81542969, ' ', 0.0, ' ', 82.19178009, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.93102646, ' ', 7.3213172, ' ', 0.0, ' ', 232.1557312, ' ', 15.23071384, ' ', 9.82335949, ' ']
['20130531', '02:20:15', 687, ' ', 412.64120483, ' ', 0.0, ' ', 84.45085144, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.91925049, ' ', 7.33533621, ' ', 0.0, ' ', 231.02618408, ' ', 15.11054993, ' ', 9.17247486, ' ']
['20130531', '02:35:15', 841, ' ', 383.62173462, ' ', 0.0, ' ', 81.18240356, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.9119606, ' ', 7.34318686, ' ', 0.0, ' ', 231.53088379, ' ', 15.01041412, ' ', 8.6918211, ' ']
['20130531', '02:50:15', 986, ' ', 404.04953003, ' ', 0.0, ' ', 79.95674133, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.90186691, ' ', 7.33982229, ' ', 0.0, ' ', 230.24513245, ' ', 14.82015514, ' ', 8.9221344, ' ']
['20130531', '03:05:16', 120, ' ', 392.8142395, ' ', 0.0, ' ', 81.9334259, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.90298843, ' ', 7.32019567, ' ', 0.0, ' ', 230.22109985, ' ', 14.53977394, ' ', 9.99359131, ' ']
['20130531', '03:20:16', 515, ' ', 494.0519104, ' ', 0.0, ' ', 82.09564972, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.91812897, ' ', 7.30056906, ' ', 0.0, ' ', 233.12905884, ' ', 14.29944706, ' ', 10.74461269, ' ']
['20130531', '03:35:18', 381, ' ', 517.84429932, ' ', 0.0, ' ', 82.45013428, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.92541885, ' ', 7.28991461, ' ', 0.0, ' ', 232.68444824, ' ', 14.11920166, ' ', 10.87478924, ' ']
['20130531', '03:50:18', 946, ' ', 488.16390991, ' ', 0.0, ' ', 81.68108368, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.93214798, ' ', 7.28486776, ' ', 0.0, ' ', 231.71112061, ' ', 13.8688612, ' ', 11.23528004, ' ']
['20130531', '04:05:19', 141, ' ', 508.65179443, ' ', 0.0, ' ', 80.45542145, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.93214798, ' ', 7.28542852, ' ', 0.0, ' ', 229.87261963, ' ', 13.66858959, ' ', 11.29536152, ' ']
['20130531', '04:20:19', 435, ' ', 553.83325195, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.89233398, ' ', 7.27701712, ' ', 0.0, ' ', 0.04806537, ' ', 14.34951496, ' ', 24.47328377, ' ']
['20130531', '04:21:56', 224, 'E', 542.41772461, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.87607145, ' ', 7.27197027, ' ', 0.0, ' ', 0.04806537, ' ', 14.98037338, ' ', 24.40318871, ' ']
['20130531', '05:17:14', 780, 'B', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U']
['20130531', '05:32:15', 505, ' ', 554.61431885, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.8906517, ' ', 7.24337101, ' ', 0.0, ' ', 0.04806537, ' ', 86.66786957, ' ', 12.76736355, ' ']
['20130531', '05:47:15', 669, ' ', 546.20288086, ' ', 0.0, ' ', 0.01201634, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.89345551, ' ', 7.24337101, ' ', 0.0, ' ', 0.06008171, ' ', 163.59248352, ' ', 12.31675053, ' ']
['20130531', '06:02:15', 864, ' ', 512.49700928, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.89289474, ' ', 7.24168873, ' ', 0.0, ' ', 0.04806537, ' ', 184.21052551, ' ', 12.16654682, ' ']
['20130531', '06:17:16', 879, ' ', 417.08724976, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.89738083, ' ', 7.24056721, ' ', 0.0, ' ', 0.04806537, ' ', 210.36610413, ' ', 12.18657398, ' ']
['20130531', '06:32:17', 104, ' ', 450.612854, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.89233398, ' ', 7.24112797, ' ', 0.0, ' ', 0.04806537, ' ', 238.79475403, ' ', 12.06641006, ' ']
['20130531', '06:47:18', 530, ' ', 539.05310059, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.88896894, ' ', 7.24112797, ' ', 0.0, ' ', 0.06008171, ' ', 249.29904175, ' ', 12.05639648, ' ']

来自记事本的数据样本

   ᢠ                    DATE       D                   TIME       C                   MILLITM    N                  MARKER     C                   N0         N                  STS_00     C                   N1         N    
STS_01 C N2 N STS_02 C N3 N STS_03 C N4 N STS_04 C N5 N
STS_05 C N6 N
STS_06 C N7 N
STS_07 C N8 N
STS_08 C N9 N
STS_09 C N10 N STS_10 C N11 N STS_11 C N12 N STS_12 C
2013053100:00:28977.00B548.84643555 0.00000000 83.11103058 0.00 0.00 0.00000000 0.00000000 6.93495178 7.26524115 0.00000000 231.49482727 16.30217171 12.48698235 2013053100:05:13577.00 571.55731201 0.00000000 81.27252960 0.00 0.00 0.00000000 0.00000000 6.93943787 7.26580191 0.00000000 230.02883911 16.31218529 12.94760895 2013053100:20:13701.00 547.88513184 0.00000000 82.43811798 0.00 0.00 0.00000000 0.00000000 6.93607330 7.27926016 0.00000000 230.24513245 16.20203400 11.91620636 2013053100:35:13885.00 613.19396973 0.00000000 81.49483490 0.00 0.00 0.00000000 0.00000000 6.92766190 7.29215765 0.00000000 230.04084778 16.14195251 11.29536152 2013053100:50:14 60.00 466.35424805 0.00000000 79.93270874 0.00 0.00 0.00000000 0.00000000 6.92541885 7.30281210 0.00000000 230.50949097 16.02178955 10.67451763

最佳答案

目前我唯一可以提供的建议是使用 my dbf module而不是自己编写。

如果您可以提供两个数据文件的示例以及它们在最终 dbf 文件中应该是什么样子的示例,我可以提供更多。

这是您的代码,已大大简化:

import dbf      
import os
from glob import glob

if __name__ == '__main__':
# get interesting dbf file names
ROfiles = set(glob(r"C:\Users\jcm\Desktop\RO\*(Wide)*.dbf"))

merged_dbf = None

# Read a database
for dbf_filename in ROfiles:
with dbf.Table(dbf_filename) as table:

# create merged dbf if it hasn't been
if merged_dbf is None:
merged_dbf = table.new('merged')

with merged_dbf:
# merge in records
for record in table:
merged_dbf.append(record)

#Create a new csv from merged_dbf
dbf.export(merged_dbf, 'catwomen')

关于python - 将多个 DBF (csv) 文件合并为一个,沿列追加,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/17045571/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com