gpt4 book ai didi

html - 按类名的数据抓取元素

转载 作者:行者123 更新时间:2023-12-04 22:29:05 24 4
gpt4 key购买 nike

我正在尝试从网站中提取数据,我想在第 1 行复制 '10' x 5'unit (class name is "unit_size medium") ',我可以成功地复制数据,但我也想要促销(类(class)名称是“promo_offers”)“第一个月免费!”在第 2 行中,问题是此促销仅针对特定单元格。因此数据具有误导性,我在第 1 个单元格中获得促销,然后出现错误。但是,我只想为那些提供促销信息的单元复制促销,否则单元格应该为空白或需要设置任何其他值。下面是代码...

请建议如何构建代码。

Sub GetClassNames()

Dim html As HTMLDocument

Dim objIE As Object
Dim element As IHTMLElement
Dim ie As InternetExplorer
Dim elements As IHTMLElementCollection
Dim result As String 'string variable that will hold our result link

Dim count As Long
Dim erow As Long

'initiating a new instance of Internet Explorer and asigning it to objIE
Set objIE = New InternetExplorer

'make IE browser visible (False would allow IE to run in the background)
objIE.Visible = True

'navigate IE to this web page (a pretty neat search engine really)
objIE.navigate "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"

'wait here a few seconds while the browser is busy
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
count = 0

Set html = objIE.document
Set elements = html.getElementsByClassName("unit_size medium")

For Each element In elements
If element.className = "unit_size medium" Then
erow = Sheet2.Cells(Rows.count, 1).End(xlUp).Offset(1, 0).Row
Cells(erow, 1) = html.getElementsByClassName("unit_size medium")(count).innerText

Cells(erow, 2) = html.getElementsByClassName("promo_offers")(count).innerText
count = count + 1
End If
Next element
End Sub

最佳答案

我会简单地用 On Error Resume Next 包裹起来。尝试访问元素时。在输出数组中已经为它保留了一个位置,因此如果不存在,则该位置保持为空。

Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetData()
Dim ie As New InternetExplorer, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ie
.Visible = True
.Navigate2 "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"

While .Busy Or .readyState < 4: DoEvents: Wend

Dim listings As Object, listing As Object, headers(), results(), r As Long, c As Long
headers = Array("size", "features", "promo", "in store", "web")
Set listings = .document.getElementById("small_units_accordion_panel").getElementsByTagName("li")
'.unit_size medium, .features, .promo_offers, .board_rate_wrapper p, .board_rate

ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)
For Each listing In listings
r = r + 1
On Error Resume Next
results(r, 1) = listing.getElementsByClassName("unit_size medium")(0).innerText
results(r, 2) = listing.getElementsByClassName("features")(0).innerText
results(r, 3) = listing.getElementsByClassName("promo_offers")(0).innerText
results(r, 4) = listing.getElementsByClassName("board_rate")(0).innerText
results(r, 5) = listing.getElementsByClassName("price")(0).innerText
On Error GoTo 0
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
.Quit
End With
End Sub

所有箱子:
Option Explicit

'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetData()
Dim ie As New InternetExplorer, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
With ie
.Visible = True
.Navigate2 "https://www.allstorageonline.com/storage-units/texas/amarillo/all-storage-hardy-115423/#utm_source=GoogleLocal&utm_medium=WWLocal&utm_campaign=115423"

While .Busy Or .readyState < 4: DoEvents: Wend

Dim listings As Object, listing As Object, headers(), results()
Dim r As Long, list As Object, item As Object
headers = Array("size", "features", "promo", "in store", "web")
Set list = .document.getElementsByClassName("main_unit")
'.unit_size medium, .features, .promo_offers, .board_rate_wrapper p, .board_rate
Dim rowCount As Long
rowCount = .document.querySelectorAll(".main_unit li").Length
ReDim results(1 To rowCount, 1 To UBound(headers) + 1)
For Each listing In list
For Each item In listing.getElementsByTagName("li")
r = r + 1
On Error Resume Next
results(r, 1) = item.getElementsByClassName("unit_size medium")(0).innerText
results(r, 2) = item.getElementsByClassName("features")(0).innerText
results(r, 3) = item.getElementsByClassName("promo_offers")(0).innerText
results(r, 4) = item.getElementsByClassName("board_rate")(0).innerText
results(r, 5) = item.getElementsByClassName("price")(0).innerText
On Error GoTo 0
Next
Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
.Quit
End With
End Sub

关于html - 按类名的数据抓取元素,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/54822598/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com