gpt4 book ai didi

python - 如何从交互式代码中抓取数据

转载 作者:行者123 更新时间:2023-12-04 07:18:10 25 4
gpt4 key购买 nike

我想从游客那里抓取数据 site
有一个酒店列表,我正在提取名称和安排,但我一直在提取每个安排的价格,因为它是交互式的,一旦我选择了安排,价格就会显示出来。
如果你们中的任何人可以帮助我,我将我的代码供您使用,并在此先感谢您。

#!/usr/bin/env python
# coding: utf-8
import json
from time import sleep
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select


# create path and start webdriver
PATH = "C:\chromedriver.exe"
driver = webdriver.Chrome(PATH)

# first get website
driver.get('https://tn.tunisiebooking.com/')
wait = WebDriverWait(driver, 20)

# params to select
params = {
'destination': 'Tunis',
'date_from': '08/08/2021',
'date_to': '09/08/2021',
'bedroom': '1'
}

# select destination
destination_select = Select(driver.find_element_by_id('ville_des'))
destination_select.select_by_value(params['destination'])

# select bedroom
bedroom_select = Select(driver.find_element_by_id('select_ch'))
bedroom_select.select_by_value(params['bedroom'])

# select dates
script = f"document.getElementById('depart').value ='{params['date_from']}';"
script += f"document.getElementById('checkin').value ='{params['date_to']}';"
driver.execute_script(script)

# click bouton search
btn_rechercher = driver.find_element_by_id('boutonr')
btn_rechercher.click()
sleep(10)

# click bouton details
#btn_plus = driver.find_element_by_id('plus_res')
#btn_plus.click()
#sleep(10)

# ----------------------------------------------------------------------------
# get list of all hotels
hotels_list = []
hotels_objects = driver.find_elements_by_xpath(
'//div[contains(@class, "enveloppe_produit")]'
)
for hotel_obj in hotels_objects:
# get price object
price_object = hotel_obj.find_element_by_xpath(
'.//div[@class="monaieprix"]'
)
price_value = price_object.find_element_by_xpath(
'.//div[1]'
).text.replace('\n', '')

# get title data
title_data = hotel_obj.find_element_by_xpath(
'.//span[contains(@class, "tittre_hotel")]'
)

# get arrangements
arrangements_obj = hotel_obj.find_elements_by_xpath(
'.//div[contains(@class, "angle")]//u'
)
arrangements = [ao.text for ao in arrangements_obj]

# get arrangements
prixM_obj = hotel_obj.find_elements_by_xpath(
'.//div[contains(@id, "prixtotal")]'
)
prixM = [ao.text for ao in prixM_obj]

# create new object
hotels_list.append({
'name': title_data.find_element_by_xpath('.//a//h3').text,
'arrangements': arrangements,
'prixM':prixM,
'price': f'{price_value}'
})

# ----------------------------------------------------------------
#for hotel in hotels_list:
# print(json.dumps(hotel, indent=4))

import pandas as pd
df = pd.DataFrame(hotels_list, columns=['name','arrangements','price'])
df.head()


最佳答案

为了获得所有排列选项的奖品,需要进行点击操作。
下面的代码检索第一个选项(如早餐)的安排及其奖品。需要对所有其他可用选项重复相同的过程。

hotels = driver.find_elements_by_xpath("//div[starts-with(@id,'produit_affair_')]")
hoteldata = {}
for hotel in hotels:
name = hotel.find_element_by_tag_name("h3").text
arr = hotel.find_elements_by_tag_name("u")
rooms = hotel.find_elements_by_tag_name("label")
roomdata = []
for room in rooms:
room.click()
rprize = hotel.find_element_by_xpath("//div[starts-with(@id,'prixtotal_')]").text
roomdata.append((room.text,rprize))
hoteldata[name] = roomdata
print(hoteldata)
和输出:
{'KANTA': [('Chambre Double ', '43'), ('Chambre Double Vue Piscine ', '50')], 'El Mouradi Palace': [('Chambre Double ', '50'), ('Chambre Double superieure ', '50')], 'Occidental Sousse Marhaba': [('Double Standard ', '50'), ('Chambre Double Vue Mer. ', '50')], 'Tui Blue Scheherazade': [('Double Standard Vue Mer ', '50'), ('Double -Swim Up ', '50')], 'Golf Residence GAS': [('Double--Standard ', '50')], 'Sindbad Center GAS': [('Chambre Double ', '50')], 'Iberostar Diar el Andalous': [('Double Standard ', '50'), ('Double Standard Vue Mer ', '50'), ('Double Prestige ', '50'), ('Suite-Junior Double ', '50')], 'Seabel AlHambra Beach Golf & Spa': [('Bungalow Double ', '50'), ('Chambre Double superieure ', '50')], 'Marhaba Palace': [('Chambre Double ', '50')], 'Cosmos Tergui Club': [('Chambre Double ', '50'), ('Double_vue Mer ', '50')], 'Riadh Palms': [('Chambre Double-superieure ', '50'), ('Chambre Double Superieure Vue Mer ', '50')], 'Royal Jinene': [('Chambre Double ', '50'), ('Double Standard Vue Mer ', '50')], 'Houria Palace': [('Chambre-double-vue piscine  ', '50'), ('Chambre Double ', '50')], 'Marhaba Beach': [('Chambre Double ', '50')], 'Marhaba Club': [('Chambre Double ', '50'), ('Chambre Double Vue Mer ', '50')], 'Palmyra Aqua Park ex soviva': [('Chambre Double ', '50')], 'Sousse City & Beach Hotel': [('Double Standard ', '50'), ('Double Standard Vue Mer ', '50')], 'Sousse Pearl Marriott Resort & Spa': [('Chambre Double Standard ', '50'), ('Double Standard Vue Mer ', '50')], 'Riviera': [('Double Standard ', '50')], 'Concorde Green Park Palace': [('Double Standard ', '50'), ('Double Standard Vue Mer ', '50'), ('Suite Prestige Vue mer ', '50')]}

关于python - 如何从交互式代码中抓取数据,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/68662594/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com