gpt4 book ai didi

python - 需要帮助来模拟 xhr 请求

转载 作者:行者123 更新时间:2023-12-05 07:26:56 24 4
gpt4 key购买 nike

我需要抓取一个带有“加载更多按钮”的网站。这是我用 Python 编写的爬虫代码:

import scrapy
import json
import requests
import re
from parsel import Selector
from scrapy.selector import Selector
from scrapy.http import HtmlResponse

headers = {
'origin': 'https://www.tayara.tn',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
'content-type': 'application/json',
'accept': '*/*',
'referer': 'https://www.tayara.tn/sc/immobilier/bureaux-et-plateaux',
'authority': 'www.tayara.tn',
'dnt': '1',
}
data = '{"query":"query ListingsPage($page: Page, $filter: SearchFilter, $sortBy: SortOrder) {\\n listings: searchAds(page: $page, filter: $filter, sortBy: $sortBy) {\\n items {\\n uuid\\n title\\n price\\n currency\\n thumbnail\\n createdAt\\n category {\\n id\\n name\\n engName\\n __typename\\n }\\n user {\\n uuid\\n displayName\\n avatar(width: 96, height: 96) {\\n url\\n __typename\\n }\\n __typename\\n }\\n __typename\\n }\\n trackingInfo {\\n transactionId\\n listName\\n recommenderId\\n experimentId\\n variantId\\n __typename\\n }\\n totalCount\\n pageInfo {\\n startCursor\\n hasPreviousPage\\n endCursor\\n hasNextPage\\n __typename\\n }\\n __typename\\n }\\n}\\n","variables":{"page":{"count":36,"offset":"cDEwbg==.MjAxOC0xMi0wMlQxMzo1MDoxMlo=.MzY="},"filter":{"queryString":null,"category":"140","regionId":null,"attributeFilters":[]},"sortBy":"CREATED_DESC"},"operationName":"ListingsPage"}'

class Tun(scrapy.Spider):

name="tayaracommercial"
start_urls = [
'https://www.tayara.tn/sc/immobilier/bureaux-et-plateaux'
]

def parse(self, response):
yield Request('https://www.tayara.tn/graphql', method='post', headers=headers, body=data, self.parse_item)


def parse_item(self, response):

source = 'Tayara'
reference = response.url.split('//')[1].split('/')[3]
titre = response.xpath('//h1[@data-name="adview_title"]/text()').extract()

yield{'Source':source, 'Reference':reference, 'Titre':titre}

这是我的小试炼。我知道那是假的。你能纠正我吗?

最佳答案

您可以使用以下示例抓取数据:

# Importing the dependencies
# This is needed to create a lxml object that uses the css selector
from lxml.etree import fromstring

# The requests library
import requests

class WholeFoodsScraper:

API_url = 'http://www.wholefoodsmarket.com/views/ajax'
scraped_stores = []

def get_stores_info(self, page):

# This is the only data required by the api
# To send back the stores info
data = {
'view_name': 'store_locations_by_state',
'view_display_id': 'state',
'page': page
}
# Making the post request
response = requests.post(self.API_url, data=data)

# The data that we are looking is in the second
# Element of the response and has the key 'data',
# so that is what's returned
return response.json()[1]['data']

关于python - 需要帮助来模拟 xhr 请求,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/54171736/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com