gpt4 book ai didi

python:urllib.request.urlopen 不工作

转载 作者:行者123 更新时间:2023-12-04 08:40:20 25 4
gpt4 key购买 nike

我对 urllib 请求有问题,因为它抛出了这个错误:

HTTPError: HTTP Error 500: Internal Server Error
我使用的代码是这样的:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import ssl
# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

url='https://seia.sea.gob.cl/documentos/documento.php?idDocumento=2148717266'
html = urllib.request.urlopen(url, context=ctx).read()
soup = BeautifulSoup(html, 'html.parser')
整个错误日志是这样的:
    HTTPError                                 Traceback (most recent call last)
<ipython-input-9-3be7085b64e6> in <module>
5
6 url='https://seia.sea.gob.cl/documentos/documento.php?idDocumento=2148717266'
----> 7 html = urllib.request.urlopen(url, context=ctx).read()
8 soup = BeautifulSoup(html, 'html.parser')

~\anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
220 else:
221 opener = _opener
--> 222 return opener.open(url, data, timeout)
223
224 def install_opener(opener):

~\anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
529 for processor in self.process_response.get(protocol, []):
530 meth = getattr(processor, meth_name)
--> 531 response = meth(req, response)
532
533 return response

~\anaconda3\lib\urllib\request.py in http_response(self, request, response)
638 # request was successfully received, understood, and accepted.
639 if not (200 <= code < 300):
--> 640 response = self.parent.error(
641 'http', request, response, code, msg, hdrs)
642

~\anaconda3\lib\urllib\request.py in error(self, proto, *args)
567 if http_err:
568 args = (dict, 'default', 'http_error_default') + orig_args
--> 569 return self._call_chain(*args)
570
571 # XXX probably also want an abstract factory that knows when it makes

~\anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
500 for handler in handlers:
501 func = getattr(handler, meth_name)
--> 502 result = func(*args)
503 if result is not None:
504 return result

~\anaconda3\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
647 class HTTPDefaultErrorHandler(BaseHandler):
648 def http_error_default(self, req, fp, code, msg, hdrs):
--> 649 raise HTTPError(req.full_url, code, msg, hdrs, fp)
650
651 class HTTPRedirectHandler(BaseHandler):

HTTPError: HTTP Error 500: Internal Server Error
任何帮助我真的很感激!
提前致谢

最佳答案

尝试设置 User-Agent您请求中的 header :

from bs4 import BeautifulSoup
from urllib.request import urlopen, Request

req = Request('https://seia.sea.gob.cl/documentos/documento.php?idDocumento=2148717266')
req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:82.0) Gecko/20100101 Firefox/82.0')
content = urlopen(req).read()
soup = BeautifulSoup(content, 'html.parser')

print(soup.prettify())
打印:
<html>
<head>
<title>
Documento - 37/8e/3a04fd80aee93aeeb325f0ff8fa06e0a4634
</title>

...and so on.

关于python:urllib.request.urlopen 不工作,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/64597172/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com