gpt4 book ai didi

python - 单击 "show more"按钮直到显示表中的所有数据并从表中获取所有数据

转载 作者:太空宇宙 更新时间:2023-11-03 14:37:35 24 4
gpt4 key购买 nike

我需要从该页面的表中获取所有数据 https://www.nets.eu/dk-da/l%C3%B8sninger/Registreringsnumre

但我需要单击“显示更多”按钮,直到显示所有数据..

但是无论我点击“显示更多”按钮多少次,表格都会保持 30 行..!?

import sys
import time
from pyvirtualdisplay import Display
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import json

class Scrape:
display = None
driver = None

def __init__(self):
# Start display
self.display = Display(visible=0, size=(800, 600))
self.display.start()

# Init driver
self.driver = webdriver.Firefox()
self.driver.wait = WebDriverWait(self.driver, 5)

self.load_page()

time.sleep(5)

self.close()

def load_page(self):
data = []
url = 'https://www.nets.eu/dk-da/l%C3%B8sninger/Registreringsnumre'
xpath = '//table[@class="itera-DataTable"]/tbody/tr'
self.driver.get(url)

try:
table = self.driver.wait.until(EC.presence_of_element_located(
(By.CLASS_NAME, 'itera-DataTable')))

print 'Table found!'

i = 1
while True:
button = self.driver.wait.until(EC.presence_of_element_located(
(By.CLASS_NAME, 'itera-nextbatchbox')))

print 'Button %d found!' % (i)

row_count = len(self.driver.find_elements_by_xpath(xpath))
print row_count

button.click()

i += 1

if i > 5:
break

i = 1
for tr in self.driver.find_elements_by_xpath(xpath):
print 'TR %d' % (i)
tr_data = []
tds = tr.find_elements_by_tag_name('td')
if tds:
tr_data.append([td.text for td in tds])
data.append(tr_data)

i += 1

#print json.dumps(data)

except TimeoutException:
self.error('Table not found')

def error(self, str):
self.close()

print>>sys.stderr, str
sys.exit(1)

def close(self):
if self.driver is not None:
self.driver.quit()
self.display.stop()

if __name__ == '__main__':
Scrape()

最佳答案

看起来根本原因是点击按钮不起作用。使用JavaScriptExecutor点击按钮就可以解决这个问题。请参阅下面我的代码。

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time

driver = webdriver.Chrome()
data = []
url = 'https://www.nets.eu/dk-da/l%C3%B8sninger/Registreringsnumre'
xpath = '//table[@class="itera-DataTable"]/tbody/tr'
driver.get(url)

try:
table = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'itera-DataTable')))
print('Table found!')
except TimeoutException:
print('Table not found')


row_count = len(driver.find_elements_by_xpath(xpath))
print(row_count)
while True:
try:
button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.itera-nextbatchbox')))
print('Button found!')
except TimeoutException:
break

driver.execute_script("arguments[0].click();", button)
time.sleep(1)

try:
WebDriverWait(driver, 10).until(lambda driver: len(driver.find_elements_by_xpath( xpath)) > row_count)
time.sleep(1)
row_count = len(driver.find_elements_by_xpath( xpath))
print(row_count)
except TimeoutException:
print('No more rows. Rows count: ' + str(len(driver.find_elements_by_xpath( xpath))))

关于python - 单击 "show more"按钮直到显示表中的所有数据并从表中获取所有数据,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/46820353/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com