gpt4 book ai didi

python - 将三个 csv/xls 文件拉入数据帧会导致找不到其中一个

转载 作者:行者123 更新时间:2023-12-03 11:06:40 26 4
gpt4 key购买 nike

请原谅标题。这只是我遇到的一个非常奇怪的错误。当我尝试将两个 xls 文件和一个 csv 文件拉入数据帧时,出现此错误:

FileNotFoundError: [Errno 2] No such file or directory: '.\\pa-dirty-price-crawler\\goldman_folder\\Trade_Detail_GSCO_GREAT_AJAX_OPERATING_PARTNERSHIP_L.P._COB08_Apr_2020_19_1586111993954.xls'

当我这样做时出现错误:

nomura = get_nomura_df_by_date(start_date)
jpm = get_jpm_df_by_date(start_date)
gs = get_goldman_df_by_date(start_date)

现在,如果我注释掉 nomura 或 jpm,则完全没有错误。换句话说,如果我这样做

# nomura = get_nomura_df_by_date(start_date)
jpm = get_jpm_df_by_date(start_date)
gs = get_goldman_df_by_date(start_date)

然后我完全没有收到任何错误,这很奇怪。 xls 和 csv 文件都在我工作区的不同文件夹中,即我有一个文件夹用于 goldman,一个用于 nomura,一个用于 jpm。

完整代码如下:

import win32com.client
import os, zipfile
import pandas as pd
import pyodbc
import sql_utils as sql
import datetime as dt
import time
import xlrd

output_file = r"C:\Users\morgan.weiss\workspace\pa-dirty-price-crawler\output\All_Bond_Repos.xlsx"


def get_jpm_zip():
Outlook = win32com.client.Dispatch("Outlook.Application")
olNs = Outlook.GetNamespace("MAPI")
Inbox = olNs.GetDefaultFolder(6)

Filter = ("@SQL=" + chr(34) + "urn:schemas:httpmail:subject" +
chr(34) + " Like '%JPMS Statement%' AND " +
chr(34) + "urn:schemas:httpmail:hasattachment" +
chr(34) + "=1")

Items = Inbox.Items.Restrict(Filter)

dates = []
for a in Items:
dates.append(a.senton.date())

for Item in Items:
for attachment in Item.Attachments:
attachment.SaveAsFile(r"C:\\Users\\morgan.weiss\\workspace\\pa-dirty-price-crawler\\jpm_folder\\" + attachment.FileName)
return dates


def get_nomura_csv():
Outlook = win32com.client.Dispatch("Outlook.Application")
olNs = Outlook.GetNamespace("MAPI")
Inbox = olNs.GetDefaultFolder(6)

Filter = ("@SQL=" + chr(34) + "urn:schemas:httpmail:subject" +
chr(34) + " Like '%Nomura (NSI) Repo%' AND " +
chr(34) + "urn:schemas:httpmail:hasattachment" +
chr(34) + "=1")

Items = Inbox.Items.Restrict(Filter)

dates = []
for a in Items:
dates.append(a.senton.date())

dates_attachment_map = {}
i = 0
for Item in Items:
for attachment in Item.Attachments:
dates_attachment_map[dates[i]] = attachment.FileName
i+=1
attachment.SaveAsFile(r"C:\\Users\\morgan.weiss\\workspace\\pa-dirty-price-crawler\\nomura_folder\\" + attachment.FileName)
return dates_attachment_map

def get_goldman_csv():
Outlook = win32com.client.Dispatch("Outlook.Application")
olNs = Outlook.GetNamespace("MAPI")
Inbox = olNs.GetDefaultFolder(6)

Filter = ("@SQL=" + chr(34) + "urn:schemas:httpmail:subject" +
chr(34) + " Like '%Repo Margin Summary from GOLDMAN SACHS%' AND " +
chr(34) + "urn:schemas:httpmail:hasattachment" +
chr(34) + "=1")

Items = Inbox.Items.Restrict(Filter)

dates = []
for a in Items:
dates.append(a.senton.date())

dates_attachment_map = {}
i = 0
for Item in Items:
for attachment in Item.Attachments:
if 'Trade_Detail_GSCO_GREAT_AJAX' in attachment.FileName:
dates_attachment_map[dates[i]] = attachment.FileName
i+=1
attachment.SaveAsFile(r"C:\\Users\\morgan.weiss\\workspace\\pa-dirty-price-crawler\\goldman_folder\\" + attachment.FileName)
return dates_attachment_map

def unzip_jpm_files():
jpm = get_jpm_zip()
dir_name = r'C:\Users\morgan.weiss\workspace\pa-dirty-price-crawler\jpm_folder'
extension = ".zip"
os.chdir(dir_name) # change directory from working dir to dir with files


jpm_map = {}
i = 0

for item in os.listdir(dir_name): # loop through items in dir
if item.endswith(extension): # check for ".zip" extension
file_name = os.path.abspath(item) # get full path of files
zip_ref = zipfile.ZipFile(file_name) # create zipfile object
zip_ref.extractall(dir_name, pwd=b'qpsqpwsr') # extract file to dir
zip_ref.close() # close file
os.remove(file_name) # delete zipped file

for item in os.listdir(dir_name):
if item.endswith(".xls"):
file_name = os.path.abspath(item)
jpm_map[jpm[i]] = file_name
i+=1
return jpm_map

def get_jpm_data_frames(path_name):
wb = xlrd.open_workbook(path_name, logfile=open(os.devnull, 'w'))
df = pd.read_excel(wb,skiprows=4,engine='xlrd')
return df

def get_nomura_data_frames(file_name):
dir_name = '.\\pa-dirty-price-crawler\\nomura_folder'
path_name = os.path.join(dir_name, file_name)
df = pd.read_csv(path_name)
return df

def get_gs_data_frames(file_name):
dir_name = '.\\pa-dirty-price-crawler\\goldman_folder'
path_name = os.path.join(dir_name, file_name)
print(os.path.isfile(path_name))
df = pd.read_excel(path_name,skiprows=9, nrows=12)
return df

def get_data_from_sql():
cnxn = pyodbc.connect(sql.connection_string)
df = pd.read_sql(sql.get_bond_repos,con=cnxn)
return df

def compare_dates(mail_date, date_time):
return mail_date.year == date_time.year and mail_date.month == date_time.month and mail_date.day == date_time.day

def get_jpm_df_by_date(date):
jpm_map = unzip_jpm_files()
time.sleep(10)
jpm_df = pd.DataFrame()
jpm_df = get_jpm_data_frames(jpm_map[date])
return jpm_df

def get_nomura_df_by_date(date):
nomura_map = get_nomura_csv()
time.sleep(10)
nomura_df = pd.DataFrame()
nomura_df = get_nomura_data_frames(nomura_map[date])
return nomura_df

def get_goldman_df_by_date(date):
goldman_map = get_goldman_csv()
time.sleep(10)
goldman_df = pd.DataFrame()
goldman_df = get_gs_data_frames(goldman_map[date])
return goldman_df

def edit_isin(df):
df['ISIN'] = df['ISIN'].apply(lambda x: x[2:])
df['ISIN'] = df['ISIN'].apply(lambda x: x[:-1])
return df



x = '2020-04-09'
start_date = dt.datetime.strptime(x, "%Y-%m-%d")
start_date = start_date.date()

df = get_data_from_sql()
# nomura = get_nomura_df_by_date(start_date)
gs = get_goldman_df_by_date(start_date)
jpm = get_jpm_df_by_date(start_date)



# gs = edit_isin(gs)
# df = df.set_index('Cusip').join(gs.set_index('ISIN'))
# print(df.head())


# df.to_excel(output_file, index=False)

注意我去掉了我的名字,在帖子里写了“usr_name”。我对这个错误感到困惑,我不知道它为什么会崩溃。

编辑:

我开始确定问题所在。当我只是调用函数来获取 zip 文件夹时,代码运行正常,当我尝试获取所有数据帧时解压缩文件夹时会出现问题。也许这可能会添加一些你们可以指出的解决方案的细节。

最佳答案

我修复了我的代码,奇怪的修复只是按此顺序获取数据帧:

nomura = get_nomura_df_by_date(start_date)
gs = get_goldman_df_by_date(start_date)
jpm = get_jpm_df_by_date(start_date)

然后神奇地没有错误...为什么?我不知道...

关于python - 将三个 csv/xls 文件拉入数据帧会导致找不到其中一个,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/61257767/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com