gpt4 book ai didi

go - 抓取数据时拒绝连接

转载 作者:行者123 更新时间:2023-12-01 22:07:20 25 4
gpt4 key购买 nike

我试图从Indeed.co.in网站一个一个地抓取职位,但是当我访问特定职位类别页面并尝试从该页面取回响应时,它通过我的连接被拒绝了,但显示错误在那个时候。
请帮我解决这个问题,我试图解决这个问题,但没有解决。
请帮忙。

2020/03/07 09:08:41 Error to Connect with Indeed Jobs Category Page. Get https://indeed.co.in/browsejobs/Engineering: dial tcp 169.44.165.69:443: connect: connection refused
package main
import (
"crypto/tls"
"fmt"
"io/ioutil"
"log"
"net/http"

"github.com/PuerkitoBio/goquery"
)
func GetBrowseJobs(Url string) {
response, err := http.Get(Url)
if err != nil {
log.Println("Error to Connect with Indeed Home page.", err)
return
}
defer response.Body.Close()
document, err := goquery.NewDocumentFromReader(response.Body)
if err != nil {
log.Fatal("Error loading HTTP response body", err.Error())
return
}
document.Find("a.icl-GlobalFooter-link").Each(processElement)
}
func processElement(index int, element *goquery.Selection) {
href, exists := element.Attr("href")
if exists {
BrowseJobsPage(href)
return
}
}
func BrowseJobsPage(Urls string) {
fmt.Println(Urls)
response, err := http.Get(Urls)
if err != nil {
log.Println("Error to Connect with Indeed Browse Jobs Page.", err)
return
}
defer response.Body.Close()
document, err := goquery.NewDocumentFromReader(response.Body)
if err != nil {
log.Fatal("Error loading HTTP response body", err.Error())
return
}
document.Find("table#categories tbody tr td a").Each(Processjobs)
fmt.Println("***********************************************************************")
}
func Processjobs(index int, element *goquery.Selection) {
href, exists := element.Attr("href")
if exists {
PerJobsTitlePage(href)
return
}
}

func PerJobsTitlePage(Urls string) {
fmt.Println(Urls)
tlsConfig := &tls.Config{
InsecureSkipVerify: true,
}
transport := &http.Transport{
TLSClientConfig: tlsConfig,
}
client := http.Client{Transport: transport}
response, err := client.Get("https://indeed.co.in" + Urls)
if err != nil {
log.Println("Error to Connect with Indeed Jobs Category Page.", err)
return
}
defer response.Body.Close()
body, err := ioutil.ReadAll(response.Body)
if err != nil {
log.Println("Page response is nil", nil)
}
document, err := goquery.NewDocumentFromReader(response.Body)
if err != nil {
log.Fatal("Error loading HTTP response body", err.Error())
return
}
document.Find("table#titles tbody tr td p.job a").Each(ProcessSinglejob)
fmt.Println("***********************************************************************")
}
func ProcessSinglejob(index int, element *goquery.Selection) {
href, exists := element.Attr("title")
if exists {
fmt.Println(href)
return
}
}
func main() {
GetBrowseJobs("https://www.indeed.co.in/")
}

最佳答案

在此行添加www

  response, err := client.Get("https://indeed.co.in" + Urls)

IE。
  response, err := client.Get("https://www.indeed.co.in" + Urls)

关于go - 抓取数据时拒绝连接,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/60574257/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com