gpt4 book ai didi

html - 按标签和类别抓取数据

转载 作者:可可西里 更新时间:2023-11-01 12:54:39 25 4
gpt4 key购买 nike

我正在尝试从网站复制数据,我需要所有范围的尺寸、价格、设施、特价、预订。我在代码下方构建了框架,但我无法复制下面正在运行的元素。遇到很多错误。有人可以调查一下吗?

 Sub gostoreit()

Dim ie As New InternetExplorer, ws As Worksheet
Dim element As IHTMLElement
Set ws = ThisWorkbook.Worksheets("Unit Data")
With ie
.Visible = True
.Navigate2 "" &
"https://www.gostoreit.com/locations/georgia/cumming/go-store-cumming/"

While .Busy Or .readyState < 4: DoEvents: Wend

Dim listings As Object, listing As Object, headers(), results(), r
As Long, c As Long, item As Object
headers = Array("Size", "promo", "Reguler Price", "Online Price", "Listing Active", "features")
Set listings = .document.getElementsByTagName("l-main-container")
ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)
For Each listing In listings

r = r + 1

results(r, 1) = listing.getElementsByClassName("size_txt")(0).innerText 'Size
results(r, 2) = listing.getElementsByClassName("helpDiscounts ls_discountsTitleSmall")(0).innerText 'promo(example. First Month Free)
results(r, 3) = listing.getElementsByClassName("wasPrice")(0).innerText 'reguler price
results(r, 4) = listing.getElementsByClassName("ls_unit_price")(0).innerText 'online price
results(r, 5) = listing.getElementsByClassName("unitSelectButtonRES isRESBut")(0).innerText ' listing active
results(r, 6) = listing.getElementsByClassName("tableUnitType _uSpan")(0).innerText ' features


Next
ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
.Quit

End With
End Sub

最佳答案

使用 iframe src 然后处理我们之前讨论过的方式(作为我的偏好),即识别行然后将行 html 转储到代理 HTMLDocument 变量以充分利用 querySelector粒度级别。我忽略了 reserve,因为这显示没有变化,您可以使用默认值自动填充它们。如果需要,可以轻松添加它们。

Option Explicit

'VBE > Tools > References: Microsoft Internet Controls
Public Sub GetData()
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = True
.Navigate2 "https://www.clickandstor.com/CAS_2.5.16/sorter/controller.php?fid=1162&mode=unit-table-p&target=casDiv1&width=100%25&height=100px&js=1&displayId=lsFramer_0&u=https%3A%2F%2Fwww.gostoreit.com%2Flocations%2Fgeorgia%2Fcumming%2Fgo-store-cumming%2F&&v_in=2.5.16&dn=1559990768103&1559990768"

While .Busy Or .readyState < 4: DoEvents: Wend

Dim html2 As HTMLDocument, rows As Object, i As Long, results(), headers()
headers = Array("Size", "Description", "On site price", "Web Price", "Offer")
Set html2 = New HTMLDocument

Do
Set rows = .document.querySelectorAll(".unitRow") '.size_txt")
Loop While rows.Length = 0
ReDim results(1 To rows.Length, 1 To UBound(headers) + 1)
On Error Resume Next
For i = 1 To rows.Length - 1
html2.body.innerHTML = rows.item(i).outerHTML
results(i, 1) = html2.querySelector(".size_txt").innerText
results(i, 2) = GetDescription(html2.querySelectorAll(".unitMoreHelpTitle, .pop_spacer_li"))
results(i, 3) = html2.querySelector(".wasPrice").innerText
results(i, 4) = html2.querySelector(".ls_unit_price").innerText
results(i, 5) = html2.querySelector(".helpDiscounts").innerText
Next
On Error GoTo 0
.Quit
End With
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub

Public Function GetDescription(ByVal nodeList As Object)
Dim i As Long, arr()
ReDim arr(0 To nodeList.Length - 1)
For i = 0 To nodeList.Length - 1
arr(i) = nodeList.item(i).innerText
Next
GetDescription = Join$(arr, Chr$(32))
End Function

如果您想要通过 iframe 进行更详细的方法。我选择导航到 iframe 的 src,但您可以使用 .document.getElementById("lsFramer_0").contentDocument.querySelector 语法访问

Option Explicit

'VBE > Tools > References: Microsoft Internet Controls
Public Sub GetData()
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = True
.Navigate2 "https://www.gostoreit.com/locations/georgia/cumming/go-store-cumming/"
While .Busy Or .readyState < 4: DoEvents: Wend
.Navigate2 .document.querySelector("#lsFramer_0").src
While .Busy Or .readyState < 4: DoEvents: Wend

Dim html2 As HTMLDocument, rows As Object, i As Long, results(), headers()
headers = Array("Size", "Description", "On site price", "Web Price", "Offer")
Set html2 = New HTMLDocument

Do
Set rows = .document.querySelectorAll(".unitRow") '.size_txt")
Loop While rows.Length = 0
ReDim results(1 To rows.Length, 1 To UBound(headers) + 1)
On Error Resume Next
For i = 1 To rows.Length - 1
html2.body.innerHTML = rows.item(i).outerHTML
results(i, 1) = html2.querySelector(".size_txt").innerText
results(i, 2) = GetDescription(html2.querySelectorAll(".unitMoreHelpTitle, .pop_spacer_li"))
results(i, 3) = html2.querySelector(".wasPrice").innerText
results(i, 4) = html2.querySelector(".ls_unit_price").innerText
results(i, 5) = html2.querySelector(".helpDiscounts").innerText
Next
On Error GoTo 0
.Quit
End With
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub

Public Function GetDescription(ByVal nodeList As Object)
Dim i As Long, arr()
ReDim arr(0 To nodeList.Length - 1)
For i = 0 To nodeList.Length - 1
arr(i) = nodeList.item(i).innerText
Next
GetDescription = Join$(arr, Chr$(32))
End Function

关于html - 按标签和类别抓取数据,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/56505229/

25 4 0