gpt4 book ai didi

Python QtWebKit 保存网页到文件

转载 作者:太空狗 更新时间:2023-10-30 02:34:36 25 4
gpt4 key购买 nike

将使用 QWebView() 显示的网页保存到文件的最佳和最简单方法是什么?

from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
from PyQt4.QtGui import *
from PyQt4.QtScript import *
import sys
import time

currentfile = "test.htm"
app = QApplication(sys.argv)
web = QWebView()
web.load(QUrl("http://news.google.com"))
web.show()
data = web.page().currentFrame().documentElement().toInnerXml()
open(currentfile,"w").write(data)
sys.exit(app.exec_())

最佳答案

由于页面加载是异步的,您必须在尝试保存之前等待 loadFinished 信号。

然后您可以使用 web.page().currentFrame().toHtml() 检索页面内容,它返回一个 python unicode 字符串,您可以使用 codecs 模块将其写入文件:

from PySide.QtCore import *
from PySide.QtGui import *
from PySide.QtWebKit import *
import sys
import codecs

class Downloader(QObject):
# To be emitted when every items are downloaded
done = Signal()

def __init__(self, urlList, parent = None):
super(Downloader, self).__init__(parent)
self.urlList = urlList
self.counter = 0
# As you probably don't need to display the page
# you can use QWebPage instead of QWebView
self.page = QWebPage(self)
self.page.loadFinished.connect(self.save)
self.startNext()

def currentUrl(self):
return self.urlList[self.counter][0]

def currentFilename(self):
return self.urlList[self.counter][1]

def startNext(self):
print "Downloading %s..."%self.currentUrl()
self.page.mainFrame().load(self.currentUrl())

def save(self, ok):
if ok:
data = self.page.mainFrame().toHtml()
with codecs.open(self.currentFilename(), encoding="utf-8", mode="w") as f:
f.write(data)
print "Saving %s to %s."%(self.currentUrl(), self.currentFilename())
else:
print "Error while downloading %s\nSkipping."%self.currentUrl()
self.counter += 1
if self.counter < len(self.urlList):
self.startNext()
else:
self.done.emit()

urlList = [("http://news.google.com", "google.html"),
("http://www.stackoverflow.com","stack.html"),
("http://www.imdb.com", "imdb.html")]

app = QApplication(sys.argv)
downloader = Downloader(urlList)
# Quit when done
downloader.done.connect(app.quit)

# To view the pages
web = QWebView()
# To prevent user action that would interrupt the current page loading
web.setDisabled(True)
web.setPage(downloader.page)
web.show()

sys.exit(app.exec_())

关于Python QtWebKit 保存网页到文件,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/7433232/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com