gpt4 book ai didi

error-handling - HTTP 错误 403 : Forbidden: Access is denied

转载 作者:行者123 更新时间:2023-12-03 08:40:47 28 4
gpt4 key购买 nike

我已经运行下面的脚本几个月了,但是到今天,当我再次运行它时,我得到了以下错误:

urllib.error.HTTPError: HTTP Error 403: Forbidden: Access is denied.

我尝试使用 sleep 添加延迟,但这似乎不起作用。似乎拒绝来自 Finviz.com,这是我运行指定屏幕并抓取代码的地方。有人对为什么突然否认有任何建议吗?
from datetime import datetime
import lxml
from lxml import html
import requests
import numpy as np
import pandas as pd
from urllib.request import urlopen as ur
from bs4 import BeautifulSoup as soup
from time import sleep


#used for removing parts of the url collection if an error occurs
print('Running Screen...')
collection = ['ar=180','r=21&ar=180','r=41&ar=180','r=61&ar=180','r=81&ar=180','r=101&ar=180','r=121&ar=180','r=141&ar=180','r=161&ar=180','r=181&ar=180','r=201&ar=180','r=221&ar=180','r=241&ar=180']
for url in collection: #scrape multiple pages
my_url = 'https://finviz.com/screener.ashx?v=141&f=cap_smallover,fa_eps5years_pos,fa_grossmargin_o10,fa_netmargin_pos,fa_opermargin_pos,fa_sales5years_o5,geo_usa,sh_avgvol_o1000,ta_beta_o0.5&ft=2&' + str(url)
sleep(15)
uClient = ur(my_url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser") #find ticker
stock_tickers = page_soup.findAll("a", {"class":"screener-link-primary"})
for tickers in stock_tickers: #find all of the tickers
ticker = tickers.text
collection = [ticker]
for url in collection: #scrape multiple pages
#balance sheet data
my_url1 = 'https://finance.yahoo.com/quote/' + str(url) + '/balance-sheet?p=' + str(url)
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'max-age=0',
'Pragma': 'no-cache',
'Referrer': 'https://google.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
}
page = requests.get(my_url1, headers)
tree = html.fromstring(page.content)
tree.xpath("//h1/text()")
table_rows = tree.xpath("//div[contains(@class, 'D(tbr)')]")
#assert len(table_rows) > 0
parsed_rows = []
for table_row in table_rows:
parsed_row = []
el = table_row.xpath("./div")
none_count = 0
for rs in el:
try:
(text,) = rs.xpath('.//span/text()[1]')
parsed_row.append(text)
except ValueError:
parsed_row.append(np.NaN)
none_count += 1
if (none_count < 4):
parsed_rows.append(parsed_row)
df_balancesheet = pd.DataFrame(parsed_rows)
df_balancesheet = pd.DataFrame(parsed_rows)
df_balancesheet = df_balancesheet.set_index(0)
df_balancesheet = df_balancesheet.transpose()
cols = list(df_balancesheet.columns)
cols[0] = 'Date'
df_balancesheet = df_balancesheet.set_axis(cols, axis='columns', inplace=False)
numeric_columns = list(df_balancesheet.columns)[1::]
#income statement data
my_url2 = 'https://finance.yahoo.com/quote/' + str(url) + '/financials?p=' + str(url)
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'max-age=0',
'Pragma': 'no-cache',
'Referrer': 'https://google.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
}
page = requests.get(my_url2, headers)
tree = html.fromstring(page.content)
tree.xpath("//h1/text()")
table_rows = tree.xpath("//div[contains(@class, 'D(tbr)')]")
#assert len(table_rows) > 0
parsed_rows = []
for table_row in table_rows:
parsed_row = []
el = table_row.xpath("./div")
none_count = 0
for rs in el:
try:
(text,) = rs.xpath('.//span/text()[1]')
parsed_row.append(text)
except ValueError:
parsed_row.append(np.NaN)
none_count += 1
if (none_count < 4):
parsed_rows.append(parsed_row)
df_incomestatement = pd.DataFrame(parsed_rows)
df_incomestatement = pd.DataFrame(parsed_rows)
df_incomestatement = df_incomestatement.set_index(0)
df_incomestatement = df_incomestatement.transpose()
cols = list(df_incomestatement.columns)
cols[0] = 'Date'
df_incomestatement = df_incomestatement.set_axis(cols, axis='columns', inplace=False)
numeric_columns = list(df_incomestatement.columns)[1::]
# cash flow data
my_url3 = 'https://finance.yahoo.com/quote/' + str(url) + '/cash-flow?p=' + str(url)
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.9',
'Cache-Control': 'max-age=0',
'Pragma': 'no-cache',
'Referrer': 'https://google.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
}
page = requests.get(my_url3, headers)
tree = html.fromstring(page.content)
tree.xpath("//h1/text()")
table_rows = tree.xpath("//div[contains(@class, 'D(tbr)')]")
#assert len(table_rows) > 0
parsed_rows = []
for table_row in table_rows:
parsed_row = []
el = table_row.xpath("./div")
none_count = 0
for rs in el:
try:
(text,) = rs.xpath('.//span/text()[1]')
parsed_row.append(text)
except ValueError:
parsed_row.append(np.NaN)
none_count += 1
if (none_count < 4):
parsed_rows.append(parsed_row)
df_cashflow = pd.DataFrame(parsed_rows)
df_cashflow = pd.DataFrame(parsed_rows)
df_cashflow = df_cashflow.set_index(0)
df_cashflow = df_cashflow.transpose()
cols = list(df_cashflow.columns)
cols[0] = 'Date'
df_cashflow = df_cashflow.set_axis(cols, axis='columns', inplace=False)
numeric_columns = list(df_cashflow.columns)[1::]
writer = pd.ExcelWriter("Financial statements/"+ ticker + '.xlsx')
df_incomestatement.to_excel(writer,'Income Statement')
df_balancesheet.to_excel(writer,'Balance Sheet')
df_cashflow.to_excel(writer,'Statement of Cash Flows')
writer.save()
print('Collecting data for ' + ticker + '...')

最佳答案

Nvm,修复它,只是没有发送用户代理 header

关于error-handling - HTTP 错误 403 : Forbidden: Access is denied,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/61754749/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com