python - 使用 Python 的单个连接上的多个请求

session = requests.Session()
params = {'fd': 1, 'count': 1024, 'auth': 'auth_token'}
r = session.get('', params=params)

# b'\xb3_\\l\xe2\xbf/:\x07'
服务器 api 文档说:

You can push multiple requests over single connection without waitingfor answer, to improve performance. The server will process therequests in the order they are received and you are guaranteed toreceive answers in the same order. It is important however to send allrequests with "Connection: keep-alive", otherwise the API server willclose the connection without processing the pending requests.

他们正在谈论一个线程和多个请求,而无需等待答案。我想它被称为 HTTP pipelining .
如何使用 Python Requests 库做到这一点?
类似 answer建议使用并行调用,这不是我的问题。它还说:“ requests 进行池连接,保持 TCP 连接打开”。我该如何实现?
如果 requests 不可用,我可以使用任何其他同步库吗? ?


您可以在没有线程的情况下并行获取多个页面。它通过重置 HTTPSConnection 的状态(私有(private)变量!)来利用 HTTP 管道。诱骗它提前发送下一个请求。

from http.client import HTTPSConnection, _CS_IDLE
from urllib.parse import urlparse, urlunparse

def pipeline(host, pages, max_out_bound=4, debuglevel=0):
page_count = len(pages)
conn = HTTPSConnection(host)
responses = [None] * page_count
finished = [False] * page_count
content = [None] * page_count
headers = {'Host': host, 'Content-Length': 0, 'Connection': 'Keep-Alive'}

while not all(finished):
# Send
out_bound = 0
for i, page in enumerate(pages):
if out_bound >= max_out_bound:
elif page and not finished[i] and responses[i] is None:
if debuglevel > 0:
print('Sending request for %r...' % (page,))
conn._HTTPConnection__state = _CS_IDLE # private variable!
conn.request("GET", page, None, headers)
responses[i] = conn.response_class(conn.sock, method=conn._method)
out_bound += 1
# Try to read a response
for i, resp in enumerate(responses):
if resp is None:
if debuglevel > 0:
print('Retrieving %r...' % (pages[i],))
out_bound -= 1
skip_read = False
if debuglevel > 0:
print(' %d %s' % (resp.status, resp.reason))
if 200 <= resp.status < 300:
# Ok
content[i] =
cookie = resp.getheader('Set-Cookie')
if cookie is not None:
headers['Cookie'] = cookie
skip_read = True
finished[i] = True
responses[i] = None
elif 300 <= resp.status < 400:
# Redirect
loc = resp.getheader('Location')
responses[i] = None
parsed = loc and urlparse(loc)
if not parsed:
# Missing or empty location header
content[i] = (resp.status, resp.reason)
finished[i] = True
elif parsed.netloc != '' and parsed.netloc != host:
# Redirect to another host
content[i] = (resp.status, resp.reason, loc)
finished[i] = True
path = urlunparse(parsed._replace(scheme='', netloc='', fragment=''))
if debuglevel > 0:
print(' Updated %r to %r' % (pages[i], path))
pages[i] = path
elif resp.status >= 400:
# Failed
content[i] = (resp.status, resp.reason)
finished[i] = True
responses[i] = None
if resp.will_close:
# Connection (will be) closed, need to resend
if debuglevel > 0:
print(' Connection closed')
for j, f in enumerate(finished):
if not f and responses[j] is not None:
if debuglevel > 0:
print(' Discarding out-bound request for %r' % (pages[j],))
responses[j] = None
elif not skip_read: # read any data
if any(not f and responses[j] is None for j, f in enumerate(finished)):
# Send another pending request
break # All responses are None?
return content

if __name__ == '__main__':
domain = ''
pages = ['/wiki/HTTP_pipelining', '/wiki/HTTP', '/wiki/HTTP_persistent_connection']
data = pipeline(domain, pages, max_out_bound=3, debuglevel=1)
for i, page in enumerate(data):
print('==== Page %r ====' % (pages[i],))

