我一直在读到使用 Selenium 的 webdrivers 的 click() 是异步的,所以我一直试图让 webdriver 等待点击完成,然后再做任何其他事情。我使用 PhantomJS 作为浏览器。
我有 WebDriverWait 对象来等待页面上的元素发生更改(这就是我在单击某些内容后判断页面是否已加载/更改的方式)。我的问题是我不断从 WebDriverWait 收到 TimeoutExceptions。
点击某些内容后,我可以做些什么来等待页面加载吗?我不想使用 time.sleep(1)
因为加载时间似乎是可变的,而且我不希望它休眠太久。这就是为什么我想显式等待页面加载。
这是我的网络驱动程序代码和相应的等待:
import time
from bs4 import BeautifulSoup
from selenium import webdriver
import selenium.webdriver.support.ui as ui
import selenium.common.exceptions as exceptions
class Webdriver():
def __init__(self, wait_time=10):
self.driver = webdriver.PhantomJS()
self.driver.set_window_size(1200,800)
self.wait = wait_time
def click(self, element_xpath, wait_xpath, sleep_time=0):
wait = ui.WebDriverWait(self.driver, self.wait)
old_element = self.driver.find_element_by_xpath(wait_xpath)
old_text = old_element.text
self.driver.find_element_by_xpath(element_xpath).click()
wait.until(lambda driver: element_changed(driver, wait_xpath, old_text,20))
time.sleep(sleep_time)
def element_changed(driver, element_xpath, old_element_text, timeout_seconds=10):
pause_interval = 1
t0 = time.time()
while time.time() - t0 < timeout_seconds:
try:
element = driver.find_element_by_xpath(element_xpath)
if element.text != old_element_text:
return True
except exceptions.StaleElementReferenceException:
return True
except exceptions.NoSuchElementException:
pass
time.sleep(pause_interval)
return False
这是运行的代码:
driver = Webdriver()
url = 'http://www.atmel.com/products/microcontrollers/avr/default.aspx?tab=parameters'
wait_xpath = '//*[@id="device-columns"]/tbody/tr[2]/td[1]/div[2]/a'
driver.load(url, wait_xpath)
soup = driver.get_soup()
pages = soup('ul', class_='pagination')[0]('a')
num_pages = len(pages)
products = set()
for i in range(num_pages):
element_xpath = '//*[@id="top-nav"]/div/ul/li[%d]/a' % (2 + i)
driver.click(element_xpath, wait_xpath)
soup = driver.get_soup()
for tag in soup('td', class_='first-cell'):
product = tag.find('div', class_='anchor')
if not product:
continue
else:
if product.find('a'):
products.add(product.find('a')['href'])
更新
我的部分问题是我正在重新加载第一页并期望它发生变化。但即便如此,在 for 循环之后将点击线和汤线向下移动,有时会需要很长时间才能更改。
我没有使用 WebDriverWait,而是创建了一个函数来等待它加载。现在似乎可以工作,但我不禁觉得它不稳定并且并不总是有效。
def click(self, element_xpath, wait_xpath=None, sleep_time=0):
if wait_xpath:
old_element = self.driver.find_element_by_xpath(wait_xpath)
old_text = old_element.text
self.driver.find_element_by_xpath(element_xpath).click()
if wait_xpath:
if not element_changed(self.driver, wait_xpath, old_text):
log.warn('click did not change element at %s', wait_xpath)
return False
time.sleep(sleep_time)
return True
def element_changed(driver, element_xpath, old_element_text, timeout_seconds=10):
pause_interval = 1
t0 = time.time()
while time.time() - t0 < timeout_seconds:
try:
element = driver.find_element_by_xpath(element_xpath)
if element.text != old_element_text:
return True
except exceptions.StaleElementReferenceException:
return True
except exceptions.NoSuchElementException:
pass
time.sleep(pause_interval)
return False
运行代码是这样的:
driver = Webdriver()
url = 'http://www.atmel.com/products/microcontrollers/avr/default.aspx?tab=parameters'
wait_xpath = '//*[@id="device-columns"]/tbody/tr[2]/td[1]/div[2]/a'
driver.load(url, wait_xpath)
soup = driver.get_soup()
pages = soup('ul', class_='pagination')[0]('a')
num_pages = len(pages)
products = set()
for i in range(num_pages):
element_xpath = '//*[@id="top-nav"]/div/ul/li[%d]/a' % (2 + i)
if i == 0:
driver.click(element_xpath, None, 1)
else:
driver.click(element_xpath, wait_xpath, 1)
soup = driver.get_soup()
for tag in soup('td', class_='first-cell'):
product = tag.find('div', class_='anchor')
if not product:
continue
else:
if product.find('a'):
products.add(product.find('a')['href'])
我是一名优秀的程序员,十分优秀!