gpt4 book ai didi

python - 请求为什么在获取安全链接时失败

转载 作者:可可西里 更新时间:2023-11-01 17:26:59 26 4
gpt4 key购买 nike

当我尝试发出安全请求时,出现了一个奇怪的错误,而且我找不到错误。我确定这是愚蠢的事情。

#!/usr/bin/env python


'''
this module was designed with web scrapers and web crawlers in mind.
I find my self writing these functions all the time. I Wrote this model
to save time.
'''

import requests
import urlparse
import urllib2
import urllib
import re
import os
import json
from fake_useragent import UserAgent

class InvalidURL(Exception):
pass

class URL(object):
'''Common routines for dealing with URLS.
'''
def __init__(self, url):
'''Setup the initial state
'''
self.raw_url = url
self.url = urlparse.urlparse(url)
self.scheme = self.url.scheme
self.domain = self.url.netloc
self.path = self.url.path
self.params = self.url.params
self.query = self.url.query
self.fragment = self.url.fragment


def __str__(self):
''' This os called when somthing
asks for a string representation of the
url
'''
return self.raw_url


def valid(self):
"""Validate the url.

returns True if url is valid
and False if it is not
"""
regex = re.compile(
r'^(?:http|ftp)s?://' # http:// or https://
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|'
r'localhost|' #localhost...
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
match = regex.match(self.raw_url)
if match:
return True


def unquote(self):
"""unquote('abc%20def') -> 'abc def'."""

return urllib2.unquote(self.raw_url)


def quote(self):
"""quote('abc def') -> 'abc%20def'

Each part of a URL, e.g. the path info, the query, etc., has a
different set of reserved characters that must be quoted.

RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
the following reserved characters.

reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
"$" | ","

Each of these characters is reserved in some component of a URL,
but not necessarily in all of them.

By default, the quote function is intended for quoting the path
section of a URL. Thus, it will not encode '/'. This character
is reserved, but in typical usage the quote function is being
called on a path where the existing slash characters are used as
reserved characters.
"""
return urllib2.quote(self.raw_url)


def parameters(self):
"""
parse the parameters of the url
and return them as a dict.
"""
return urlparse.parse_qs(self.params)


def secure(self):
""" Checks if the url uses ssl. """
if self.scheme == 'https':
return True


def extention(self):
""" return the file extention """
return os.path.splitext(self.path)[1]


def absolute(self):
""" Checks if the URL is absolute. """
return bool(self.domain)


def relitive(self):
""" Checks if the url is relitive. """
return bool(self.scheme) is False


def encode(self, mapping):
"""Encode a sequence of two-element tuples or dictionary into a URL query string.

If any values in the query arg are sequences and doseq is true, each
sequence element is converted to a separate parameter.

If the query arg is a sequence of two-element tuples, the order of the
parameters in the output will match the order of parameters in the
input.
"""
query = urllib.urlencode(mapping)
return urlparse.urljoin(self.raw_url, query)


class Request(object):


allow_redirects = True
timeout = 5
ramdom_useragent = 0
verify = False
session = requests.Session()
stream = True
proxies = {}

def __init__(self, url):
""" Set the inital state """
self.agentHeaders = {}
self.url = URL(url)
if not self.url.valid():
raise InvalidURL("{} is invalid".format(url))

def stream(self, answer):
self.stream = bool(answer)

def randomUserAgent(self):
""" Set a random User-Agent """
self.setUserAgent(UserAgent().random)


def allowRedirects(self, answer):
""" Choose whether or not to follow redirects."""
self.allow_redirects = bool(answer)


def setUserAgent(self, agent):
""" Set the User-Agent """
self.setHeaders('User-Agent', agent)


def setHeaders(self, key, value):
""" Set custom headers """
self.agentHeaders[key] = value


def verify(self, answer):
""" Set whether or not to verify SSL certs"""
self.verify = bool(answer)


def get(self):
"""Sends a GET request"""
return self.session.get(
url=self.url,
headers=self.agentHeaders,
allow_redirects=self.allow_redirects,
timeout=self.timeout,
verify=self.verify,
stream=self.stream,
proxies=self.proxies
)


def head(self):
""" Send a head request and return the headers """
return self.session.head(
self.url,
headers=self.agentHeaders,
allow_redirects=self.allow_redirects,
timeout=self.timeout,
verify=self.verify,
proxies=self.proxies
).headers


def options(self):
""" Send a options request and return the options """
return self.session.options(
self.url,
headers=self.agentHeaders,
allow_redirects=self.allow_redirects,
timeout=self.timeout,
verify=self.verify,
proxies=self.proxies
).headers['allow']


def json(self):
"""
Deserialize json data (a ``str`` or ``unicode`` instance
containing a JSON document) to a Python object.
"""
return json.loads(self.text)


def headerValue(self, value):
""" Get a value from the headers. """
return self.headers().get(value)



request = Request('https://www.google.com')
req = request.get()
print req.text
print request.head()
print
print req.headers.get('link')
print request.options()

request = Request('https://www.google.com')
req = request.get()

Sat Jul 29 HttpClient python UserAgent.py
Traceback (most recent call last):
File "UserAgent.py", line 234, in <module>
req = request.get()
File "UserAgent.py", line 192, in get
proxies=self.proxies
File "/home/ricky/.local/lib/python2.7/site-packages/requests/sessions.py", line 515, in get
return self.request('GET', url, **kwargs)
File "/home/ricky/.local/lib/python2.7/site-packages/requests/sessions.py", line 502, in request
resp = self.send(prep, **send_kwargs)
File "/home/ricky/.local/lib/python2.7/site-packages/requests/sessions.py", line 612, in send
r = adapter.send(request, **kwargs)
File "/home/ricky/.local/lib/python2.7/site-packages/requests/adapters.py", line 407, in send
self.cert_verify(conn, request.url, verify, cert)
File "/home/ricky/.local/lib/python2.7/site-packages/requests/adapters.py", line 224, in cert_verify
if not cert_loc or not os.path.exists(cert_loc):
File "/usr/lib/python2.7/genericpath.py", line 26, in exists
os.stat(path)
TypeError: coercing to Unicode: need string or buffer, instancemethod found

最佳答案

看看你的Request.verify方法:

def verify(self, answer):
""" Set whether or not to verify SSL certs"""
self.verify = bool(answer)

它与 Request.verify 冲突属性。

因此,当您调用 Request.get() ,你正在传递你的 verify verify 的实例方法requests.session.get(..., verify=<your method>) 中的参数, 而不是字符串( should point to a certificate bundle )或 bool .

线索在您的堆栈跟踪中:TypeError: coercing to Unicode: need string or buffer, instancemethod found .

解决方案:重命名您的 verify类似于 setVerify 的方法(与其他方法保持一致)。

与此问题无关,我建议您实现 Request通过扩展 requests.Session 类类(class)。这样你就可以定义更少的方法(比如 getheadjson 等)

关于python - 请求为什么在获取安全链接时失败,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/45392983/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com