gpt4 book ai didi

python - Python headless QtWebKit 浏览器中多个网页的屏幕截图

转载 作者:太空狗 更新时间:2023-10-30 02:48:41 27 4
gpt4 key购买 nike

我正在尝试呈现多个网页并截取它们的屏幕截图,但我只能在呈现一个网页时让它工作,因为当我在多个网页上尝试时,程序要么停止在它的轨道上并永远挂起,要么只是不对图像、CSS 做任何事情,并将提取网站的文本并将其放入一个长文本 block 中。通常发生的情况是它会挂起。

我用来在内存中呈现网页的代码是这样的:

class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)

# Settings
s = self.settings()
#s.setAttribute(QWebSettings.AutoLoadImages, False)
s.setAttribute(QWebSettings.JavascriptCanOpenWindows, False)
s.setAttribute(QWebSettings.PluginsEnabled, True)

self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(QUrl(url))
#self.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
self.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
self.app.exec_()

def _loadFinished(self, result):
self.frame = self.mainFrame()
size = self.frame.contentsSize()
size.setWidth(1366)
self.setViewportSize(size)
self.app.quit()

下面是我保存图片的方式:

def run(url):
os.chdir("output")

r = Render(url)
image = QImage(r.viewportSize(), QImage.Format_ARGB32)
painter = QPainter(image)
r.frame.render(painter)
painter.end()
fp = "%s.png" % os_safe_name(url)
image.save(fp)
os.chdir("..")

有人知道为什么会这样吗?

最佳答案

与 Luke 的回答中描述的差不多,我改变了一些事情以避免为每个 Render

创建一个 QApplication 实例

不是最整洁的,但对我有用:

import re
import sys
import time

# Tested with PySide 1.0.9, changing imports to PyQt should work identically
from PySide.QtCore import Qt, QUrl
from PySide.QtGui import QApplication, QImage, QPainter
from PySide.QtWebKit import QWebPage, QWebSettings


def os_safe_name(url):
url = re.sub("[^a-zA-Z0-9_-]+", "_", url)
url = re.sub("_{2,}", "_", url)
return url


class Render(QWebPage):
def __init__(self, url):
QWebPage.__init__(self)

self.url = url
self.finished = False

# Settings
s = self.settings()
#s.setAttribute(QWebSettings.AutoLoadImages, False)
s.setAttribute(QWebSettings.JavascriptCanOpenWindows, False)
s.setAttribute(QWebSettings.PluginsEnabled, True)

#self.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
self.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)

# When page is loaded, callback saves image to file
self.loadFinished.connect(self._loadFinished)

self.mainFrame().load(QUrl(url))

def _loadFinished(self, result):
frame = self.mainFrame()
size = frame.contentsSize()
size.setWidth(1366)
self.setViewportSize(size)

image = QImage(self.viewportSize(), QImage.Format_ARGB32)

painter = QPainter(image)
frame.render(painter)
painter.end()

self.filepath = "output/%s.png" % os_safe_name(self.url)
image.save(self.filepath)

self.finished = True


def run(url, app = None):
if app is None:
app = QApplication(sys.argv)

r = Render(url)

while not r.finished:
app.processEvents()
time.sleep(0.01)

return r.filepath


if __name__ == '__main__':
app = QApplication(sys.argv)

print run("http://stackoverflow.com", app=app)
print run("http://google.com", app=app)

关于python - Python headless QtWebKit 浏览器中多个网页的屏幕截图,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/11903375/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com