gpt4 book ai didi

python - Selenium Python 脚本在 Windows 和 Ubuntu 环境中有不同的行为

转载 作者:太空宇宙 更新时间:2023-11-03 16:53:10 25 4
gpt4 key购买 nike

我试过在 Windows 和 Ubuntu 上运行脚本,两者都使用 Python 3 和最新版本的 geckodriver,导致不同的行为。完整的脚本如下。

我正在尝试从备考网站获取多个不同测试的数据。有不同的科目,每个科目都有专业,每个科目都有练习测试,每个科目都有几个问题。 scrape 函数遍历了获取每种类型数据的步骤。

subject <--- specialization <---- practice-test *------ question

get_questions 函数是不同之处:

  • 在 Windows 中,它的行为符合预期。单击最后一个问题的选择后,将转到结果页面。
  • 在 Ubuntu 中,当在最后一个问题上单击一个选项时,它会重新加载最后一个问题并不断单击相同的选项并重新加载相同的问题。

    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    import pathlib
    import time
    import json
    import os

    driver=webdriver.Firefox(executable_path="./geckodriver.exe")
    wait = WebDriverWait(driver, 15)
    data=[]

    def setup():

    driver.get('https://www.varsitytutors.com/practice-tests')
    try:
    go_away_1= driver.find_element_by_class_name("ub-emb-iframe")
    driver.execute_script("arguments[0].style.visibility='hidden'", go_away_1)
    go_away_2= driver.find_element_by_class_name("ub-emb-iframe-wrapper")
    driver.execute_script("arguments[0].style.visibility='hidden'", go_away_2)
    go_away_3= driver.find_element_by_class_name("ub-emb-visible")
    driver.execute_script("arguments[0].style.visibility='hidden'", go_away_3)
    except:
    pass

    def get_subjects(subs=[]):
    subject_clickables_xpath="/html/body/div[3]/div[9]/div/*/div[@data-subject]/div[1]"
    subject_clickables=driver.find_elements_by_xpath(subject_clickables_xpath)
    subject_names=map(lambda x : x.find_element_by_xpath('..').get_attribute('data-subject'), subject_clickables)
    subject_pairs=zip(subject_names, subject_clickables)
    return subject_pairs

    def get_specializations(subject):

    specialization_clickables_xpath="//div//div[@data-subject='"+subject+"']/following-sibling::div//div[@class='public_problem_set']//a[contains(.,'Practice Tests')]"
    specialization_names_xpath="//div//div[@data-subject='"+subject+"']/following-sibling::div//div[@class='public_problem_set']//a[contains(.,'Practice Tests')]/../.."
    specialization_names=map(lambda x : x.get_attribute('data-subject'), driver.find_elements_by_xpath(specialization_names_xpath))
    specialization_clickables = driver.find_elements_by_xpath(specialization_clickables_xpath)
    specialization_pairs=zip(specialization_names, specialization_clickables)
    return specialization_pairs

    def get_practices(subject, specialization):
    practice_clickables_xpath="/html/body/div[3]/div[8]/div[3]/*/div[1]/a[1]"
    practice_names_xpath="//*/h3[@class='subject_header']"
    lengths_xpath="/html/body/div[3]/div[8]/div[3]/*/div[2]"
    lengths=map(lambda x : x.text, driver.find_elements_by_xpath(lengths_xpath))
    print(lengths)
    practice_names=map(lambda x : x.text, driver.find_elements_by_xpath(practice_names_xpath))
    practice_clickables = driver.find_elements_by_xpath(practice_clickables_xpath)
    practice_pairs=zip(practice_names, practice_clickables)
    return practice_pairs

    def remove_popup():
    try:

    button=wait.until(EC.element_to_be_clickable((By.XPATH,"//button[contains(.,'No Thanks')]")))
    button.location_once_scrolled_into_view
    button.click()
    except:
    print('could not find the popup')

    def get_questions(subject, specialization, practice):
    remove_popup()
    questions=[]
    current_question=None
    while True:
    question={}
    try:
    WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH,"/html/body/div[3]/div[7]/div[1]/div[2]/div[2]/table/tbody/tr/td[1]")))
    question_number=driver.find_element_by_xpath('/html/body/div[3]/div[7]/div[1]/div[2]/div[2]/table/tbody/tr/td[1]').text.replace('.','')
    question_pre=driver.find_element_by_class_name('question_pre')
    question_body=driver.find_element_by_xpath('/html/body/div[3]/div[7]/div[1]/div[2]/div[2]/table/tbody/tr/td[2]/p')
    answer_choices=driver.find_elements_by_class_name('question_row')
    answers=map(lambda x : x.text, answer_choices)
    question['id']=question_number
    question['pre']=question_pre.text
    question['body']=question_body.text
    question['answers']=list(answers)
    questions.append(question)
    choice=WebDriverWait(driver,20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"input.test_button")))
    driver.execute_script("arguments[0].click();", choice[3])
    time.sleep(3)
    except Exception as e:
    if 'results' in driver.current_url:
    driver.get(driver.current_url.replace('http://', 'https://'))
    # last question has been answered; record results
    remove_popup()
    pathlib.Path('data/'+subject+'/'+specialization).mkdir(parents=True, exist_ok=True)
    with open('data/'+subject+'/'+specialization+'/questions.json', 'w') as outfile:
    json.dump(list(questions), outfile)
    break
    else:
    driver.get(driver.current_url.replace('http://', 'https://'))
    return questions


    def scrape():
    setup()
    subjects=get_subjects()
    for subject_name, subject_clickable in subjects:
    subject={}
    subject['name']=subject_name
    subject['specializations']=[]
    subject_clickable.click()
    subject_url=driver.current_url.replace('http://', 'https://')
    specializations=get_specializations(subject_name)
    for specialization_name, specialization_clickable in specializations:
    specialization={}
    specialization['name']=specialization_name
    specialization['practices']=[]
    specialization_clickable.click()
    specialization_url=driver.current_url.replace('http://', 'https://')
    practices=get_practices(subject_name, specialization_name)
    for practice_name, practice_clickable in practices:
    practice={}
    practice['name']=practice_name
    practice_clickable.click()
    questions=get_questions(subject_name, specialization_name, practice_name)
    practice['questions']=questions
    driver.get(specialization_url)
    driver.get(subject_url)
    data.append(subject)
    print(data)
    scrape()

谁能帮我弄清楚是什么原因造成的?

最佳答案

这只是时机。在加载下一页之前,最后一个问题将比 3 秒 sleep 时间长得多。等待页面消失可以解决此问题并加快脚本执行速度。

  from selenium.common.exceptions import StaleElementReferenceException
<snip>
choice=WebDriverWait(driver,20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"input.test_button")))
choice[3].click()
try:
while choice[3].is_displayed():
time.sleep(1)
except StaleElementReferenceException as e:
continue

关于python - Selenium Python 脚本在 Windows 和 Ubuntu 环境中有不同的行为,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59077712/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com