gpt4 book ai didi

python - python 爬虫不适用于 asyncio

转载 作者:太空宇宙 更新时间:2023-11-03 13:38:58 25 4
gpt4 key购买 nike

import asyncio
import aiohttp
import bs4
import tqdm


@asyncio.coroutine
def get(*args, **kwargs):
response = yield from aiohttp.request('GET', *args, **kwargs)
return (yield from response.read_and_close(decode=True))


@asyncio.coroutine
def wait_with_progress(coros):
for f in tqdm.tqdm(asyncio.as_completed(coros), total=len(coros)):
yield from f


def first_magnet(page):
soup = bs4.BeautifulSoup(page)
a = soup.find('a', title='Download this torrent using magnet')
return a['href']


@asyncio.coroutine
def print_magnet(query):
url = 'http://thepiratebay.se/search/{}/0/7/0'.format(query)
with (yield from sem):
page = yield from get(url, compress=True)
magnet = first_magnet(page)
print('{}: {}'.format(query, magnet))

distros = ['archlinux', 'ubuntu', 'debian']
sem = asyncio.Semaphore(5)
loop = asyncio.get_event_loop()
f = asyncio.wait([print_magnet(d) for d in distros])
loop.run_until_complete(f)

没有返回任何内容。错误如下:

C:\Python34\python.exe C:/Users/Marco/PycharmProjects/untitled3/crawler.py
Unclosed connection
client_connection: Connection<('thepiratebay.se', 443, True)>
Unclosed response
client_response: <ClientResponse(https://thepiratebay.se/search/archlinux/0/7/0) [200 OK]>
<CIMultiDictProxy('SERVER': 'cloudflare-nginx', 'DATE': 'Sun, 24 Jan 2016 03:17:30 GMT', 'CONTENT-TYPE': 'text/html; charset=UTF-8', 'TRANSFER-ENCODING': 'chunked', 'CONNECTION': 'keep-alive', 'SET-COOKIE': 'PHPSESSID=72fd62ba4a13c716576868e13d00a3ae; path=/; domain=.thepiratebay.se', 'EXPIRES': 'Thu, 19 Nov 1981 08:52:00 GMT', 'CACHE-CONTROL': 'private, max-age=10800, pre-check=10800', 'LAST-MODIFIED': 'Sun, 15 Mar 2015 05:20:08 GMT', 'SET-COOKIE': 'language=en_EN; expires=Mon, 23-Jan-2017 03:17:32 GMT; path=/; domain=.thepiratebay.se', 'VARY': 'Accept-Encoding', 'CF-RAY': '269895ee698e32dd-HKG', 'CONTENT-ENCODING': 'gzip')>

Unclosed connection
client_connection: Connection<('thepiratebay.se', 443, True)>
Unclosed response
client_response: <ClientResponse(https://thepiratebay.se/search/debian/0/7/0) [200 OK]>
<CIMultiDictProxy('SERVER': 'cloudflare-nginx', 'DATE': 'Sun, 24 Jan 2016 03:17:30 GMT', 'CONTENT-TYPE': 'text/html; charset=UTF-8', 'TRANSFER-ENCODING': 'chunked', 'CONNECTION': 'keep-alive', 'SET-COOKIE': 'PHPSESSID=52751957860238a12a8bff265f19a3b8; path=/; domain=.thepiratebay.se', 'EXPIRES': 'Thu, 19 Nov 1981 08:52:00 GMT', 'CACHE-CONTROL': 'private, max-age=10800, pre-check=10800', 'LAST-MODIFIED': 'Sun, 15 Mar 2015 05:20:08 GMT', 'SET-COOKIE': 'language=en_EN; expires=Mon, 23-Jan-2017 03:17:31 GMT; path=/; domain=.thepiratebay.se', 'VARY': 'Accept-Encoding', 'CF-RAY': '269895ee61921944-HKG', 'CONTENT-ENCODING': 'gzip')>

Unclosed connection
client_connection: Connection<('thepiratebay.se', 443, True)>
Unclosed response
client_response: <ClientResponse(https://thepiratebay.se/search/ubuntu/0/7/0) [200 OK]>
<CIMultiDictProxy('SERVER': 'cloudflare-nginx', 'DATE': 'Sun, 24 Jan 2016 03:17:30 GMT', 'CONTENT-TYPE': 'text/html; charset=UTF-8', 'TRANSFER-ENCODING': 'chunked', 'CONNECTION': 'keep-alive', 'SET-COOKIE': 'PHPSESSID=1227bf9b240e1d057ea80b2605724913; path=/; domain=.thepiratebay.se', 'EXPIRES': 'Thu, 19 Nov 1981 08:52:00 GMT', 'CACHE-CONTROL': 'private, max-age=10800, pre-check=10800', 'LAST-MODIFIED': 'Sun, 15 Mar 2015 05:20:08 GMT', 'SET-COOKIE': 'language=en_EN; expires=Mon, 23-Jan-2017 03:17:32 GMT; path=/; domain=.thepiratebay.se', 'VARY': 'Accept-Encoding', 'CF-RAY': '269895ee7ae31944-HKG', 'CONTENT-ENCODING': 'gzip')>

Task exception was never retrieved
future: <Task finished coro=<print_magnet() done, defined at C:/Users/Marco/PycharmProjects/untitled3/crawler.py:25> exception=AttributeError("'ClientResponse' object has no attribute 'read_and_close'",)>
Traceback (most recent call last):
File "C:\Python34\lib\asyncio\tasks.py", line 236, in _step
result = coro.send(value)
File "C:/Users/Marco/PycharmProjects/untitled3/crawler.py", line 29, in print_magnet
page = yield from get(url, compress=True)
File "C:/Users/Marco/PycharmProjects/untitled3/crawler.py", line 10, in get
return (yield from response.read_and_close(decode=True))
AttributeError: 'ClientResponse' object has no attribute 'read_and_close'
Task exception was never retrieved
future: <Task finished coro=<print_magnet() done, defined at C:/Users/Marco/PycharmProjects/untitled3/crawler.py:25> exception=AttributeError("'ClientResponse' object has no attribute 'read_and_close'",)>
Traceback (most recent call last):
File "C:\Python34\lib\asyncio\tasks.py", line 236, in _step
result = coro.send(value)
File "C:/Users/Marco/PycharmProjects/untitled3/crawler.py", line 29, in print_magnet
page = yield from get(url, compress=True)
File "C:/Users/Marco/PycharmProjects/untitled3/crawler.py", line 10, in get
return (yield from response.read_and_close(decode=True))
AttributeError: 'ClientResponse' object has no attribute 'read_and_close'
Task exception was never retrieved
future: <Task finished coro=<print_magnet() done, defined at C:/Users/Marco/PycharmProjects/untitled3/crawler.py:25> exception=AttributeError("'ClientResponse' object has no attribute 'read_and_close'",)>
Traceback (most recent call last):
File "C:\Python34\lib\asyncio\tasks.py", line 236, in _step
result = coro.send(value)
File "C:/Users/Marco/PycharmProjects/untitled3/crawler.py", line 29, in print_magnet
page = yield from get(url, compress=True)
File "C:/Users/Marco/PycharmProjects/untitled3/crawler.py", line 10, in get
return (yield from response.read_and_close(decode=True))
AttributeError: 'ClientResponse' object has no attribute 'read_and_close'
Exception ignored in: Exception ignored in: Exception ignored in:

最佳答案

自 2015 年 12 月起,aiohttp 方法 ClientResponse.read_and_close() 已消失。您可以在 changelog 中找到它.基于 readthedocs site 上给出的示例我觉得改行就可以了:

返回(来自 response.read_and_close(decode=True) 的 yield )

返回(来自 response.text() 的 yield )

readthedocs 页面上有很好的示例,只要您记住语法会有点不同,因为您使用的是 python 3.4。而不是 awaitasync def 使用 yield from@couroutine 装饰器,你应该没问题。

关于python - python 爬虫不适用于 asyncio,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/34971826/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com