gpt4 book ai didi

python - PyQt 类不适用于第二次使用

转载 作者:太空狗 更新时间:2023-10-30 01:08:29 25 4
gpt4 key购买 nike

我正在使用 PyQt 完全加载一个页面(包括 JS)并使用 Beautiful Soup 获取它的内容。在第一次迭代时工作正常,但之后,它崩溃了。我对 Python 了解不多,对 PyQt 了解更少,因此非常欢迎任何帮助。

类借自here .

from PyQt4.QtCore import QUrl, SIGNAL
from PyQt4.QtGui import QApplication
from PyQt4.QtWebKit import QWebPage

from bs4 import BeautifulSoup
from bs4.dammit import UnicodeDammit
import sys
import signal


class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.html = None
signal.signal(signal.SIGINT, signal.SIG_DFL)
self.connect(self, SIGNAL('loadFinished(bool)'), self._finished_loading)
self.mainFrame().load(QUrl(url))
self.app.exec_()

def _finished_loading(self, result):
self.html = self.mainFrame().toHtml()
self.soup = BeautifulSoup(UnicodeDammit(self.html).unicode_markup)
self.app.quit()

###################################################################


l = ["http://www.google.com/?q=a", "http://www.google.com/?q=b", "http://www.google.com/?q=c"]

for page in l:
soup = Render(page).soup
print("# soup done: " + page)

enter image description here

最佳答案

该示例崩溃是因为 RenderPage 类试图为它尝试加载的每个 url 创建一个新的 QApplication 和事件循环。

相反,只应创建一个QApplication,并且QWebPage 子类应在处理完每个页面后加载一个新的url,而不是使用for 循环。

这是一个重写的例子,它应该做你想做的事:

import sys, signal
from bs4 import BeautifulSoup
from bs4.dammit import UnicodeDammit
from PyQt4 import QtCore, QtGui, QtWebKit

class WebPage(QtWebKit.QWebPage):
def __init__(self):
QtWebKit.QWebPage.__init__(self)
self.mainFrame().loadFinished.connect(self.handleLoadFinished)

def process(self, items):
self._items = iter(items)
self.fetchNext()

def fetchNext(self):
try:
self._url, self._func = next(self._items)
self.mainFrame().load(QtCore.QUrl(self._url))
except StopIteration:
return False
return True

def handleLoadFinished(self):
self._func(self._url, self.mainFrame().toHtml())
if not self.fetchNext():
print('# processing complete')
QtGui.qApp.quit()


def funcA(url, html):
print('# processing:', url)
# soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)
# do stuff with soup...

def funcB(url, html):
print('# processing:', url)
# soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)
# do stuff with soup...

if __name__ == '__main__':

items = [
('http://stackoverflow.com', funcA),
('http://google.com', funcB),
]

signal.signal(signal.SIGINT, signal.SIG_DFL)
print('Press Ctrl+C to quit\n')
app = QtGui.QApplication(sys.argv)
webpage = WebPage()
webpage.process(items)
sys.exit(app.exec_())

关于python - PyQt 类不适用于第二次使用,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/21909907/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com