gpt4 book ai didi

python脚本无法读取csv文件并出错 - StopIteration

转载 作者:行者123 更新时间:2023-12-04 08:42:25 32 4
gpt4 key购买 nike

我正在编写从 azure DevOps 下载大型审计日志 csv 文件并根据给定条件过滤数据的脚本。这适用于小 csv 文件,但对于具有大数据的文件,它会失败
字段 = 下一个(读者)
停止迭代
有人可以帮助解决脚本中所需的更改吗?我在 MacOs 上使用 python 3.7.9

def getproject(url,pat):

response = requests.get(url, auth=HTTPBasicAuth(username='',password=pat))

if response.status_code == 200:
url_data = response.content
tempfile = open("temp.csv","wb")
tempfile.write(url_data)
tempfile.close()
return url_data

else:
print("\nERROR : Unable to conect The server...")


def FilterData():
lists =[]
pro_name=[]
RepoId =[]
RepoName=[]

new_file = open("temp_new.csv", 'w',newline='')
writer = csv.writer(new_file)
with open("temp.csv", 'r') as readFile:
reader = csv.reader(readFile)
fields = next(reader)
lists.append(fields)
for row in reader:
for field in row:
if field == "Git.RepositoryCreated":
lists.append(row)
writer.writerows(lists)
readFile.close()
new_file.close()
os.remove("temp.csv")

timestamp = (datetime.datetime.now())
timestamp = timestamp.strftime("%d%B%Y_%H%M%S")
file_name = "Data2_"+str(timestamp)+".csv"

file1 = open("temp_new.csv",'r')
df = pd.read_csv(file1)
for i in df["Data"]:
res = json.loads(i)
pro_name.append(res['ProjectName'])
RepoId.append(res['RepoId'])
RepoName.append(res['RepoName'])
Disp_Name = df["ActorDisplayName"]
ActionId = df["ActionId"]
TimeStamp = df["Timestamp"]
file1.close()
os.remove("temp_new.csv")


Header = ["Actor Display Name","Project
Name","RepoName","RepoId","ActionId","Timestamp"]
d=[Disp_Name,pro_name,RepoName,RepoId,ActionId,TimeStamp]
export_data = zip_longest(*d, fillvalue = '')
with open(file_name, 'w',newline='') as myfile:
wr = csv.writer(myfile)
wr.writerow(Header)
wr.writerows(export_data)
myfile.close()


if __name__ == '__main__':

parser = argparse.ArgumentParser("This is used for getting list of the projects")
parser.add_argument("-o" , dest="org", help="org name")
parser.add_argument("-p" , dest="pat", help="pat value")
parser.add_argument("-sd" , dest="sdate", help="Start Date")
parser.add_argument("-ed" , dest="edate", help="End Date")

args = parser.parse_args()
org = args.org
token = args.pat
startdate = args.sdate
enddate = args.edate

url = "https://auditservice.dev.azure.com/{org_name}/_apis/audit/downloadlog?
format=csv&startTime={startdt}&endTime={enddt}&api-version=6.1-
preview.1".format(org_name=org,startdt=startdate,enddt=enddate)

#call "getproject" function to check url and token to further create required csv
getproject(url,token)

FilterData()

最佳答案

[+] 在您的 getproject 函数中,
您应该使用 try except 块来处理 http 错误等。
[+] 如果您尝试下载的 csv 文件非常大,最好将数据分块写入。
至于fields = next(reader) stopIteration errpr。
我不知道。 ¯_(ツ)_/¯
尝试将您的代码放入调试器并逐步执行。
见:download large file in python with requests

def getproject(url,pat):
try:
# NOTE the stream=True parameter below
with requests.get(url, auth=HTTPBasicAuth(username='',password=pat), stream=True) as r:
r.raise_for_status()
with open('tmp.csv', 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
# If you have chunk encoded response uncomment if
# and set chunk_size parameter to None.
#if chunk:
f.write(chunk)

except requests.exceptions.ConnectionError as c_error:
print(f"[-] Connection Error: {c_error}")
except requests.exceptions.Timeout as t_error:
print(f"[-] Connection Timeout Error: {t_error}")
except requests.exceptions.RequestException as req_error:
print(f"[-] Some Ambiguous Exception: {req_error}")


# This way seems faster based upon the comments of the link i shared
import requests
import shutil

def download_file(url):
local_filename = url.split('/')[-1]
with requests.get(url, stream=True) as r:
with open(local_filename, 'wb') as f:
shutil.copyfileobj(r.raw, f)

return local_filename

关于python脚本无法读取csv文件并出错 - StopIteration,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/64492339/

32 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com