gpt4 book ai didi

Python Webdriver 多线程

转载 作者:太空狗 更新时间:2023-10-30 02:57:17 24 4
gpt4 key购买 nike

我正在尝试使用以下代码生成多个 webdriver 实例:http://www.ibm.com/developerworks/aix/library/au-threadingpython/

import time
import Queue
import urllib2
import threading
from selenium import webdriver
from BeautifulSoup import BeautifulSoup
hosts = ["http://yahoo.com", "http://google.com", "http://amazon.com",
"http://ibm.com", "http://apple.com"]
queue = Queue.Queue
out_queue = Queue.Queue

class Login_Driver(threading.Thread):
def __init__(self, queue, out_queue, driver):
threading.Thread.__init__(self)
self.queue = queue
self.out_queue = out_queue
self.driver = driver
print driver.title
def run(self):
while True:
#grabs host from queue
host = self.queue.get()
#grabs urls of hosts and then grabs chunk of webpage
driver.get(host)
chunk = driver.page_source()
#place chunk into out queue
self.out_queue.put(chunk)
#signals to queue job is done
self.queue.task_done()
class Poster(threading.Thread):
def __init__(self, driver, out_queue):
self.out_queue = out_queue
self.driver = driver
print driver.name
def run(self):
while True:
#grabs host from queue
chunk = self.out_queue.get()
#parse the chunk
soup = BeautifulSoup(chunk)
print soup.findAll(['title'])
#signals to queue job is done
self.out_queue.task_done()
start = time.time()
def main():
#spawn a pool of threads, and pass them queue instance
for i in range(5):
driver = webdriver.Firefox()
t = Login_Driver(queue, out_queue, driver)
t.setDaemon(True)
t.start()
time.sleep(20)
#populate queue with data
for host in hosts:
queue.put(host)
for i in range(5):
dt = Poster(out_queue)
dt.setDaemon(True)
dt.start()
#wait on the queue until everything has been processed
queue.join()
out_queue.join()
main()
print "Elapsed Time: %s" % (time.time() - start)

错误:TypeError:未绑定(bind)方法 get() 必须使用 Queque 实例作为第一个参数调用(取而代之的是什么)

我是线程、类、进程方面的新手,您能告诉我什么更好用吗,线程还是进程,如果能给我一个例子就太好了。谢谢你们。

更新

工作代码:

import time
import Queue
import urllib2
import threading
from selenium import webdriver
from BeautifulSoup import BeautifulSoup

hosts = ["http://yahoo.com", "http://google.com", "http://amazon.com",
"http://ibm.com", "http://apple.com"]
queue = Queue.Queue()
out_queue = Queue.Queue()

class Login_Driver(threading.Thread):
#def __init__(self, driver):
def __init__(self, queue, out_queue, driver):
threading.Thread.__init__(self)
self.queue = queue
self.out_queue = out_queue
self.driver = driver
print "In init first class.."
def run(self):
while True:
#grabs host from queue
host = self.queue.get()
#grabs urls of hosts and then grabs chunk of webpage
self.driver.get(host)
chunk = self.driver.page_source
#place chunk into out queue
self.out_queue.put(chunk)
#signals to queue job is done
print self.driver.title
self.queue.task_done()
class Poster(threading.Thread):
def __init__(self, out_queue, driver):
threading.Thread.__init__(self)
self.out_queue = out_queue
self.driver = driver
print "In init a second class.."
def run(self):
while True:
#grabs host from queue
chunk = self.out_queue.get()
#parse the chunk
soup = BeautifulSoup(chunk)
print soup.findAll(['title'])
#signals to queue job is done
print self.driver.name
self.out_queue.task_done()
start = time.time()
def main():
#spawn a pool of threads, and pass them queue instance
for i in range(5):
driver = webdriver.Firefox()
t = Login_Driver(queue, out_queue, driver)
t.setDaemon(True)
t.start()
print "Started webdriver: --- "+str(i)+" --- from main"
print "All started"
time.sleep(3)
#populate queue with data
for host in hosts:
queue.put(host)
print "Opening website: "+host
print "All sites passed for opening.."
time.sleep(3)
for i in range(5):
dt = Poster(out_queue, driver)
dt.setDaemon(True)
dt.start()
print "Starting second class/title and name beautifull soup and webdriver: --- "+str(i)+" --- from main"
print "Started secound class.."
time.sleep(3)
#wait on the queue until everything has been processed
queue.join()
out_queue.join()
print "out_queue.join()"
main()
print "Elapsed Time: %s" % (time.time() - start)

最佳答案

您没有正确实例化队列。而不是,

queue = Queue.Queue
out_queue = Queue.Queue

应该是

queue = Queue.Queue()
out_queue = Queue.Queue()

关于Python Webdriver 多线程,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/37615350/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com