gpt4 book ai didi

python - 在 Scrapy 中使用项目

转载 作者:行者123 更新时间:2023-12-01 08:11:31 24 4
gpt4 key购买 nike

我使用Scrapy并尝试输出一个json文件。当我不使用 item 但我想使用它们时,它非常有用。

所以我的蜘蛛代码是:

import json
import scrapy
from scrapy.loader import ItemLoader
from ccv_spiders.items import AuctionItem

class GlenMarchSpider(scrapy.Spider):
name = 'auction_results'

def __init__(self, *args, **kwargs):
with open('file.json', encoding='utf-8') as data_file:
self.data = json.load(data_file)


def start_requests(self):
for item in self.data:
request = scrapy.Request(item['gm_url'], callback=self.parse)
request.meta['item'] = item
yield request

def parse(self, response):
item = response.meta['item']
item['results'] = []

for caritem in response.css("div.car-item-border"):
loader= ItemLoader(item=AuctionItem(), selector=caritem, response=response)
item['results'].append({
loader.add_css("marque", "div.make::text"),
loader.add_css("model", "div.make::text"),
loader.add_css("model_year", "div.make::text"),
loader.add_css("price_str", "div.price::text"),
loader.add_css("auction_house", "div.auctionHouse::text"),
loader.add_css("auction_country", "div.auctionHouse::text"),
loader.add_css("auction_url", "div.view-auction a::attr(href)"),
loader.add_css("img", "img.img-responsive::attr(src)")
})
yield loader.load_item()

yield item

我的 items.py 代码是:

import scrapy


class AuctionItem(scrapy.Item):
marque= scrapy.Field()
model= scrapy.Field()
model_year= scrapy.Field()
price_str= scrapy.Field()
auction_house= scrapy.Field()
auction_country= scrapy.Field()
auction_date= scrapy.Field()
auction_url= scrapy.Field()
img= scrapy.Field()

我的 Json 输出:

{
"objectID": 10000,
"gm_url": "https://www.glenmarch.com/cars/results/quick/AC/10?unsold=1?limit=9999",
"results": [
[null]
]
},
{
"marque": [" 1913 AC 10 hp Light Car "],
"model": [" 1913 AC 10 hp Light Car "],
"model_year": [" 1913 AC 10 hp Light Car "],
"price_str": ["£24,150"],
"auction_house": [" Bonhams - The Beaulieu Sale, Beaulieu, UK "],
"auction_country": [" Bonhams - The Beaulieu Sale, Beaulieu, UK "],
"auction_url": ["http://www.bonhams.com/auctions/23594/lot/417/?category=list"],
"img": ["https://www.glenmarch.com/img/auctions/car/thumb/16-8-29-667feacd0f478172e463f237ba7ee692.jpg"]
}

但是我想要结果数组中的第二个对象,但我不知道该怎么做...

最佳答案

首先尝试收集您的元素,然后仅得出一次结果:

def parse(self, response):
item = response.meta['item']
item['results'] = []

for caritem in response.css("div.car-item-border"):
loader = ItemLoader(item=AuctionItem(), selector=caritem, response=response)
loader.add_css("marque", "div.make::text")
loader.add_css("model", "div.make::text")
loader.add_css("model_year", "div.make::text")
loader.add_css("price_str", "div.price::text")
loader.add_css("auction_house", "div.auctionHouse::text")
loader.add_css("auction_country", "div.auctionHouse::text")
loader.add_css("auction_url", "div.view-auction a::attr(href)")
loader.add_css("img", "img.img-responsive::attr(src)")
item['results'].append(loader.load_item())

yield item

关于python - 在 Scrapy 中使用项目,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/55223783/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com