gpt4 book ai didi

python - 使用 BeautifulSoup 在 Python 中抓取缺货通知程序

转载 作者:行者123 更新时间:2023-11-30 22:10:27 29 4
gpt4 key购买 nike

我正在尝试构建一个通知程序,当缺货商品重新有货时,它会向我发送电子邮件。到目前为止,我已经缩小了如何检索商品名称和价格的范围。然而,当我尝试使用“在商店中查找”旁边的蓝色按钮“缺货”作为标签时,由于某种原因,它会检索到“添加到购物车”,该按钮仅在该商品有库存时才出现。所以我尝试使用标题上方的“OUT OF STOCK”文本将 oos_status 设置为 True。根据网页上的 i7 配置是否缺货(撰写本文时),它应该显示为 True,但仍然显示为 False,这意味着它有库存。如有改进,我们将不胜感激。这是代码:

from urllib.request import Request, urlopen
from bs4 import BeautifulSoup as soup

url = 'https://www.microsoft.com/en-ca/p/huawei-matebook-x-pro-
laptop/8n4k86d4j006/4X0P?activetab=pivot%3aoverviewtab'

req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
uClient = urlopen(req)
page_html = uClient.read()
uClient.close()

# html parsing
page_soup = soup(page_html, 'html.parser')

# grabs OOS container
# container_oos = page_soup.findAll("div", {"class": "cli_badge context-buy-
box-badge"})
container_oos = page_soup.findAll('div', {"class": "cli_badge context-buy-
box-badge"})

# grabs price container
container_price_disclaimer = page_soup.findAll("div", {'class': "price-
disclaimer"})

# grabs name container
container_name = page_soup.findAll("div", {"class": "m-product-detail-hero-
product-placement oneui-override"})

# finds text of name, price and out of stock status
name = container_name[0].findAll('h1', {'id': 'page-title'})[0].text.strip()
price = container_price_disclaimer[0].findAll('span')[0].text.strip()
oos_status = False

# Using 'OUT OF STOCK' text above title to decide whether out of stock
if container_oos[0].find('span', {'id': 'out-of-stock-badge'}) == None:
oos_status = False
elif container_oos[0].find('span', {'id': 'out-of-stock-badge'})
[0].text.strip() == 'OUT OF STOCK':
oos_status = True

最佳答案

页面通过 AJAX 调用从外部站点加载数据。如果您查看 Firefox/Chrome 网络检查器,您将看到调用的去向。

此示例将获取有关页面上找到的所有 SKU 的信息:

from bs4 import BeautifulSoup
import requests
import json
from pprint import pprint

url = 'https://www.microsoft.com/en-ca/p/huawei-matebook-x-pro-laptop/8n4k86d4j006/4X0P?activetab=pivot:overviewtab'

headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0'}
soup = BeautifulSoup(requests.get(url, headers=headers).text, 'lxml')
url = soup.select_one('[data-availability-url]')['data-availability-url']

data = []
for data_availability_id, data_inventory_sku_id, data_sku in zip(soup.select('div.context-configuration-buttons [data-availability-id]'), \
soup.select('div.context-configuration-buttons [data-inventory-sku-id]'), \
soup.select('div#TechSpec [data-sku]')):
data.append({
'availabilityId': data_availability_id['data-availability-id'],
'distributorId': 9000000013,
'inventorySkuId': data_inventory_sku_id['data-inventory-sku-id'],
'preorder': False,
'productId': soup.select_one('[data-product-id]')['data-product-id'],
'skuId':data_sku['data-sku'],
})

r = requests.post(url, headers={'Content-Type': 'application/json'}, data=json.dumps(data))
pprint(json.loads(r.text))

这将打印:

{'availabilities': [{'Allocations': {},
'availabilityId': '8W2321TK7D0Q',
'availableLots': {'0001-01-01T00:00:00.0000000Z': {'9000000013': {'deliverByDates': {'0001': '2018-08-09T12:00:00.0000000Z',
'0004': '2018-08-16T12:00:00.0000000Z'},
'deliveryType': 'Ship',
'hasArbitraryLimitPolicy': 'False',
'inStock': 'False',
'isUnknownDate': 'False',
'onlineOrderAvailable': 'True',
'render': 'True',
'showDateOverride': 'False'}}},
'catalogSkuId': 'HB3R',
'distributorSkuId': 'QF9-01635',
'futureLots': {'2018-08-07T04:00:00.0000000Z': {'9000000013': {'deliverByDates': {'0001': '2018-08-12T12:00:00.0000000Z',
'0004': '2018-08-19T12:00:00.0000000Z'},
'deliveryType': 'Ship',
'doNotFulfillBeforeDate': 'True',
'hasManuallyConfiguredDeliveryDate': 'False',
'inStock': 'True',
'isProductLaunchFutureLot': 'True',
'isUnknownDate': 'False',
'onlineOrderAvailable': 'True',
'render': 'True',
'showDateOverride': 'False',
'warehouseAllocation': {}}}},
'inventoryControlSkuId': 'QF9-01635',
'lastWarehouseUpdateTime': '2018-08-04T06:46:49.0000000Z',
'productId': '8N4K86D4J006'},
{'Allocations': {},
'availabilityId': '8WDNCCR153LS',
'availableLots': {'0001-01-01T00:00:00.0000000Z': {'9000000013': {'deliverByDates': {'0001': '2018-08-09T12:00:00.0000000Z',
'0004': '2018-08-16T12:00:00.0000000Z'},
'deliveryType': 'Ship',
'hasArbitraryLimitPolicy': 'False',
'inStock': 'False',
'isUnknownDate': 'False',
'onlineOrderAvailable': 'False',
'render': 'True',
'showDateOverride': 'False'}}},
'catalogSkuId': '4X0P',
'distributorSkuId': 'QF9-01638',
'futureLots': {'2018-08-07T04:00:00.0000000Z': {'9000000013': {'deliverByDates': {'0001': '2018-08-12T12:00:00.0000000Z',
'0004': '2018-08-19T12:00:00.0000000Z'},
'deliveryType': 'Ship',
'doNotFulfillBeforeDate': 'True',
'hasManuallyConfiguredDeliveryDate': 'False',
'inStock': 'False',
'isProductLaunchFutureLot': 'True',
'isUnknownDate': 'False',
'onlineOrderAvailable': 'False',
'render': 'True',
'showDateOverride': 'False',
'warehouseAllocation': {}}},
'2018-09-04T04:00:00.0000000Z': {'9000000013': {'deliverByDates': {'0001': '2018-09-09T12:00:00.0000000Z',
'0004': '2018-09-16T12:00:00.0000000Z'},
'deliveryType': 'Ship',
'doNotFulfillBeforeDate': 'True',
'hasManuallyConfiguredDeliveryDate': 'False',
'inStock': 'False',
'isProductLaunchFutureLot': 'False',
'isUnknownDate': 'False',
'onlineOrderAvailable': 'False',
'render': 'True',
'showDateOverride': 'False',
'warehouseAllocation': {}}}},
'inventoryControlSkuId': 'QF9-01638',
'lastWarehouseUpdateTime': '2018-08-04T06:46:49.0000000Z',
'productId': '8N4K86D4J006'}],
'inStock': 'False'}

如您所见,有些 SKU(配置)有库存,有些则没有。这取决于您要搜索哪个配置,该页面 URL 上有多个配置。

关于python - 使用 BeautifulSoup 在 Python 中抓取缺货通知程序,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/51681873/

29 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com