gpt4 book ai didi

python - 从Google Play商店应用网站中提取评论

转载 作者:行者123 更新时间:2023-12-02 09:42:37 26 4
gpt4 key购买 nike

import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import pandas as pd

class FindByXpathCss():
# Declaring variables
Reviews = [] # List to store final set of reviews
reviewText = [] # List to store reviews extracted from XPath
reviewFullText = []

# Chromedriver path
driver = webdriver.Chrome(executable_path=r"F:\Chrome-webdriver\chromedriver.exe")
driver.maximize_window()

baseUrl = "https://play.google.com/store/apps/details?id=com.delta.mobile.android&hl=en_US&showAllReviews=true"

driver.get(baseUrl)
# driver.execute_script("scrollBy(0,300);")
# Scrolling down
for i in range(20):
driver.find_element_by_xpath('//*[@id="yDmH0d"]').send_keys(Keys.ARROW_DOWN, i)
time.sleep(0.5)

# To click on Show more button
#btnShowMore = driver.find_element_by_xpath('//*[@id="fcxH9b"]/div[4]/c-wiz/div/div[2]''/div/div[1]/div/div/div[1]/div[2]/div[2]/div/span/span').click()
# Scrolling to top
for j in range(10):
driver.find_element_by_xpath('//*[@id="yDmH0d"]').send_keys(Keys.ARROW_UP, j)

#for i in range(10):
review_btn = driver.find_elements_by_xpath("//button[contains(@class,'')][contains(text(),'Full Review')]")
single_review_btn = driver.find_element_by_xpath("//button[contains(@class,'')][contains(text(),'Full Review')]")
#time.sleep(1)

带有2个标签的div html标签,其中一个的jsname为“fbQN7e”,该标签用于保存较大的评论,并且那些评论将具有称为“完整评论”的按钮。同一div html标签中的另一个跨度是“bN97Pc”,该位置用于容纳较小的评论,而该评论的末尾将没有“完整评论”按钮。我无法获得两种类型的跨度的评论。在这里,我尝试将reviewFullText列表直接写入数据框,但仅获取元素数据类型,而不获取文本。我不知道为什么会这样。
    for btn in review_btn:
btn.click()
reviewFullText = driver.find_elements_by_css_selector("span[jsname='fbQN7e']")

#if(single_review_btn.is_enabled()==False):
#reviewText = driver.find_elements_by_css_selector("span[jsname=\"bN97Pc\"]")
##else:
#pass

# Iterating each reviews and appending into list Reviews
for txtreview in reviewText:
reviewFullText.append(txtreview.text)

print(len(reviewFullText))

# Writing the list values into csv file
df = pd.DataFrame(reviewFullText)
#df = pd.DataFrame({'Reviews': 'Reviews'}) #'Sentiment': 'null'})
df.to_csv('Reviews.csv', index=True, encoding='utf-8')

driver.close()

最佳答案

我已修改您的解决方案以从页面中检索所有评论。

import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
class FindByXpathCss():
driver = webdriver.Chrome(executable_path=r"C:\New folder\chromedriver.exe")
driver.maximize_window()
baseUrl = "https://play.google.com/store/apps/details?id=com.delta.mobile.android&hl=en_US&showAllReviews=true"
driver.get(baseUrl)

scrolls = 3
while True:
scrolls -= 1
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(3)
if scrolls < 0:
break

buttonClick = WebDriverWait(driver, 30).until(
EC.visibility_of_all_elements_located((By.XPATH, "//button[contains(@class,'')][contains(text(),'Full Review')]")))
for element in buttonClick:
driver.execute_script("arguments[0].click();", element)

reviewText = WebDriverWait(driver, 30).until(
EC.presence_of_all_elements_located((By.XPATH, "//*[@class='UD7Dzf']")))


for textreview in reviewText:
print textreview.text

reviewText = WebDriverWait(driver, 30).until(
EC.presence_of_all_elements_located((By.XPATH, "//*[@class='UD7Dzf']")))

# reviewText = driver.find_elements_by_xpath("//*[@class='UD7Dzf']")
for textreview in reviewText:
print textreview.text

输出:

enter image description here

关于python - 从Google Play商店应用网站中提取评论,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/61101495/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com