gpt4 book ai didi

python - 将DataFrame列类型从字符串转换为日期

转载 作者:行者123 更新时间:2023-12-03 08:44:01 29 4
gpt4 key购买 nike

我正在尝试将某些列转换为日期格式,但是没有任何运气。这是我的代码:

from datetime import date, datetime
from utils import misc_errors
from os import listdir, remove
from os.path import isfile, join
from pathlib import Path
import pandas as pd
import shutil
import csv
import io
import codecs

# getting paths that will be used later
path = str(Path().absolute()) + r'\\'
files = []
fetch = r'C:\' +'\\'
net = r'C:\' +'\\'

# getting the names of the files needed to copy
allfiles = [f for f in listdir(fetch) if isfile(join(fetch, f))]
for name in allfiles:
if name.endswith('csv'):
files.append(name)
for file_name in files:

#copy the file
shutil.copy2(fetch + file_name, path + file_name)

#get the date for later
file_date = date.today().strftime("%Y%m%d")

# Reading the data from the csv file
#file_df = pd.read_csv(file_name, sep=',', quotechar='"', thousands=',', encoding='Latin-1')
file_df = pd.read_csv(file_name, sep=',',delimiter=',', quotechar='"', thousands=',', encoding='Latin-1', dtype='object', low_memory=False, skiprows=5)
file_df.columns = [col.strip() for col in file_df.columns]

#populate the count column
total = len(file_df.index)
count = []
for i in range(0, total):
count.append('1')
file_df["count()"] = count

# get a list of the headers for use later
headers = file_df.columns.values.tolist()

file_df.fillna('',inplace=True)

if 'project' in file_name:
# remove all duplicates from the projects file
file_df = file_df.drop_duplicates(keep='first')

file_final = "PROJECTS.FULL." + file_date
supplier = []
for i in range(0, total):
supplier.append('Unclassified')

file_df["Suppliers - ERP Supplier ID"] = supplier
file_df["Suppliers - ERP Supplier"] = supplier

file_df = file_df.apply(lambda x: pd.Series([str(x[i]).replace("\n",'') for i in range(0, len(x))], index=headers), axis=1)

num_headers = [r"sum(Annual Spend Amount)", r"sum(Total Contract Value Amount)"]
for header in num_headers:
file_df[header] = ['{0:.0f}'.format(float(file_df[header][i])) if file_df[header][i] == 0 else '{0:,.2f}'.format(float(file_df[header][i])) if file_df[header][i] != '' else '' for i in range(0,len(file_df[header]))]

header = r"sum(% of Total Contract Value in US)"
file_df[header] = [int(float(file_df[header][i])) if file_df[header][i] != '' else '' for i in range(0, len(file_df[header]))]

header = "Reporting Year"
file_df[header] = [int(float(file_df[header][i])) if file_df[header][i] != '' else '' for i in range(0, len(file_df[header]))]

word_headers = ["Description", "Key Considerations", "Key Highlights / Value Statement", "Status Update"]
for header in word_headers:
file_df = misc_errors(file_df, header)

file_df.columns = [c.replace("–", "-") for c in file_df]

file_headers = ["Begin Date","End Date - Date","Estimated Completion Date - Date",
"Anticipated T&O Legal Engagement Date - Date","Benefits Start Date - Date", "Benefits End Date - Date" ]

pd.to_datetime(file_headers['Begin Date'], errors='ignore')

file_df.to_csv(file_final, index=False, encoding="latin-1")

remove(file_name)

shutil.copy2(path + file_final, net + file_final)

我正在尝试将file_header(靠近底部)中的列转换为日期,

在此处更新并添加了完整的错误日志:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-3-f04094ba593a> in <module>
79 "Anticipated T&O Legal Engagement Date - Date","Benefits Start Date - Date", "Benefits End Date - Date" ]
80
---> 81 pd.to_datetime(file_headers['Begin Date'], errors='ignore')
82
83 file_df.to_csv(file_final, index=False, encoding="latin-1")

TypeError: list indices must be integers or slices, not str

仍然没有工作。感谢您到目前为止提供的所有帮助,如果在这里还有其他事情可以告诉我。

最佳答案

您可能要检查Date列的内容。尝试遍历它并找出无效的值:

for row in df.itertuples():
try:
pd.to_datetime(row.Date)
except:
print(row.Date)

然后想想你要怎么做

关于python - 将DataFrame列类型从字符串转换为日期,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58959002/

29 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com