gpt4 book ai didi

python - 如何使用 asyncio 使我的程序异步?

转载 作者:行者123 更新时间:2023-11-30 22:23:05 25 4
gpt4 key购买 nike

这是我的代码:

import urllib
import webbrowser
from bs4 import BeautifulSoup
import requests
import re

address = 'https://google.com/search?q='
# Default Google search address start
file = open( "OCR.txt", "rt" )
# Open text document that contains the question
word = file.read()
file.close()

myList = [item for item in word.split('\n')]
newString = ' '.join(myList)
# The question is on multiple lines so this joins them together with proper spacing

qstr = urllib.parse.quote_plus(newString)
# Encode the string

newWord = address + qstr
# Combine the base and the encoded query

response = requests.get(newWord)

#with open('output.html', 'wb') as f:
# f.write(response.content)
#webbrowser.open('output.html')

answers = open("ocr2.txt", "rt")

ansTable = answers.read()
answers.close()

ans = ansTable.splitlines()

ans1 = str(ans[0])
ans2 = str(ans[2])
ans3 = str(ans[4])

ans1Score = 0
ans2Score = 0
ans3Score = 0

links = []

soup = BeautifulSoup(response.text, 'lxml')

for r in soup.find_all(class_='r'):

linkRaw = str(r)

link = re.search("(?P<url>https?://[^\s]+)", linkRaw).group("url")

if '&' in link:

finalLink = link.split('&')
link = str(finalLink[0])

links.append(link)

#print(links)
#print(' ')

for g in soup.find_all(class_='g'):

webBlock = str(g)

ans1Tally = webBlock.count(ans1)
ans2Tally = webBlock.count(ans2)
ans3Tally = webBlock.count(ans3)

if ans1 in webBlock:

ans1Score += ans1Tally

else:

ans1Found = False

if ans2 in webBlock:

ans2Score += ans2Tally

else:

ans2Found = False

if ans3 in webBlock:

ans3Score += ans3Tally

else:

ans3Found = False

if ans1Found and ans2Found and ans3Found is False:

searchLink = str(links[0])

if searchLink.endswith('pdf'):
pass

else:

response2 = requests.get(searchLink)
soup2 = BeautifulSoup(response2.text, 'lxml')

for p in soup2.find_all('p'):

extraBlock = str(p)

extraAns1Tally = extraBlock.count(ans1)
extraAns2tally = extraBlock.count(ans2)
extraAns3Tally = extraBlock.count(ans3)

if ans1 in extraBlock:

ans1Score += extraAns1Tally

if ans2 in extraBlock:

ans2Score += extraAns2Tally

if ans3 in extraBlock:

ans3Score += extraAns3Tally

with open("Results.txt", "w") as results:
results.write(newString + '\n\n')
results.write(ans1+": "+str(ans1Score)+'\n')
results.write(ans2+": "+str(ans2Score)+'\n')
results.write(ans3+": "+str(ans3Score))

links.pop(0)

print(' ')
print('-----')
print(ans1+": "+str(ans1Score))
print(ans2+": "+str(ans2Score))
print(ans3+": "+str(ans3Score))
print('-----')

基本上,现在它一次抓取每个“g”一个,而该程序可以通过同时抓取每个链接而获益匪浅。例如,我希望它们同时进行抓取,而不是等到抓取完成后再进行。抱歉,如果这是一个简单的问题,但我对 asyncio 的经验很少,所以如果有人可以提供帮助,我们将不胜感激。谢谢!

最佳答案

要编写异步程序,您需要:

  • 使用async def定义函数
  • 使用await调用它
  • 创建事件循环并在其中运行一些函数
  • 使用 asyncio.gather 同时运行请求

其他几乎和平常一样。您应该使用一些异步模块,而不是使用阻塞 request 模块。例如,aiohttp :

python -m pip install aiohttp

并像这样使用它:

async def get(url):
async with aiohttp.ClientSession() as session:
async with session.get('https://api.github.com/events') as resp:
return await resp.text()
<小时/>

这是我进行了一些更改的代码。我没有检查它是否真的有效,因为我没有您使用的文件。您还应该将 for g in soup.find_all(class_='g'): 的逻辑移至单独的函数,并使用 asyncio.gather 运行多个这些函数,以受益于异步。

import asyncio
import aiohttp
import urllib
import webbrowser
from bs4 import BeautifulSoup
import re


async def get(url):
async with aiohttp.ClientSession() as session:
async with session.get('https://api.github.com/events') as resp:
return await resp.text()


async def main():
address = 'https://google.com/search?q='
# Default Google search address start
file = open( "OCR.txt", "rt" )
# Open text document that contains the question
word = file.read()
file.close()

myList = [item for item in word.split('\n')]
newString = ' '.join(myList)
# The question is on multiple lines so this joins them together with proper spacing

qstr = urllib.parse.quote_plus(newString)
# Encode the string

newWord = address + qstr
# Combine the base and the encoded query

text = await get(newWord)

#with open('output.html', 'wb') as f:
# f.write(response.content)
#webbrowser.open('output.html')

answers = open("ocr2.txt", "rt")

ansTable = answers.read()
answers.close()

ans = ansTable.splitlines()

ans1 = str(ans[0])
ans2 = str(ans[2])
ans3 = str(ans[4])

ans1Score = 0
ans2Score = 0
ans3Score = 0

links = []

soup = BeautifulSoup(text, 'lxml')

for r in soup.find_all(class_='r'):

linkRaw = str(r)

link = re.search("(?P<url>https?://[^\s]+)", linkRaw).group("url")

if '&' in link:

finalLink = link.split('&')
link = str(finalLink[0])

links.append(link)

#print(links)
#print(' ')

for g in soup.find_all(class_='g'):

webBlock = str(g)

ans1Tally = webBlock.count(ans1)
ans2Tally = webBlock.count(ans2)
ans3Tally = webBlock.count(ans3)

if ans1 in webBlock:

ans1Score += ans1Tally

else:

ans1Found = False

if ans2 in webBlock:

ans2Score += ans2Tally

else:

ans2Found = False

if ans3 in webBlock:

ans3Score += ans3Tally

else:

ans3Found = False

if ans1Found and ans2Found and ans3Found is False:

searchLink = str(links[0])

if searchLink.endswith('pdf'):
pass

else:

text2 = await get(searchLink)
soup2 = BeautifulSoup(text2, 'lxml')

for p in soup2.find_all('p'):

extraBlock = str(p)

extraAns1Tally = extraBlock.count(ans1)
extraAns2tally = extraBlock.count(ans2)
extraAns3Tally = extraBlock.count(ans3)

if ans1 in extraBlock:

ans1Score += extraAns1Tally

if ans2 in extraBlock:

ans2Score += extraAns2Tally

if ans3 in extraBlock:

ans3Score += extraAns3Tally

with open("Results.txt", "w") as results:
results.write(newString + '\n\n')
results.write(ans1+": "+str(ans1Score)+'\n')
results.write(ans2+": "+str(ans2Score)+'\n')
results.write(ans3+": "+str(ans3Score))

links.pop(0)

print(' ')
print('-----')
print(ans1+": "+str(ans1Score))
print(ans2+": "+str(ans2Score))
print(ans3+": "+str(ans3Score))
print('-----')


if __name__ == '__main__':
loop = asyncio.get_event_loop()
try:
loop.run_until_complete(main())
finally:
loop.run_until_complete(loop.shutdown_asyncgens())
loop.close()
<小时/>

更新:

主要思想是将请求的循环内的逻辑移动到单独的协程中,并将多个协程传递给 asyncio.gather。它将并行化您的请求。

async def main():
# Her do all that are before the loop.

coros = [
process_single_g(g)
for g
in soup.find_all(class_='g')
]

results = await asyncio.gather(*coros) # this function will run multiple tasks concurrently
# and return all results together.

for res in results:
ans1Score, ans2Score, ans3Score = res

print(' ')
print('-----')
print(ans1+": "+str(ans1Score))
print(ans2+": "+str(ans2Score))
print(ans3+": "+str(ans3Score))
print('-----')



async def process_single_g(g):
# Here do all things you inside loop for concrete g.

text2 = await get(searchLink)

# ...

return ans1Score, ans2Score, ans3Score

关于python - 如何使用 asyncio 使我的程序异步?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/48133838/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com