gpt4 book ai didi

python - 输出文件留下几个月没有天气数据的数据

转载 作者:太空宇宙 更新时间:2023-11-03 16:35:19 24 4
gpt4 key购买 nike

我正在尝试从天气网站 wunderground.com 抓取数据。我希望 1941-2016 年每个月(一月至十二月)都从费城发出。

起初我有这段代码,但这只是在 2016 年 1 月抓取并制作了一个文件。

#!/usr/bin/python
#weather.scraper

from bs4 import BeautifulSoup
import urllib
import json

def main():
# weatherData = weather_philadelphia_data #Json beginns here
# with open(jsonfile, 'w') as outputFile:
# json.dump(weatherData, outputFile)
# #scrapping beginns here
r = urllib.urlopen("https://www.wunderground.com/history/airport/KPHL/2016/1/1/MonthlyHistory.html?&reqdb.zip=&reqdb.magic=&reqdb.wmo=&MR=1").read()
soup = BeautifulSoup(r, "html.parser")
tables = soup.find_all("table", class_="responsive airport-history-summary-table")

weatherdata = []
for table in tables: #reason for it to do it 12x

for tr in table.find_all("tr"):
firstTd = tr.find("td")
if firstTd and firstTd.has_attr("class") and "indent" in firstTd['class']:
values = {}
tds = tr.find_all("td")
maxVal = tds[1].find("span", class_="wx-value")
avgVal = tds[2].find("span", class_="wx-value")
minVal = tds[3].find("span", class_="wx-value")
if maxVal:
values['max'] = maxVal.text
if avgVal:
values['avg'] = avgVal.text
if minVal:
values['min'] = minVal.text
if len(tds) > 4:
sumVal = tds[4].find("span", class_="wx-value")
if sumVal:
values['sum'] = sumVal.text
scrapedData = {}
scrapedData[firstTd.text] = values
weatherdata.append(scrapedData)
break
with open ("january_2016.json", 'w' ) as outFile:
json.dump(weatherdata, outFile, indent=2)


print "done"
if __name__ == "__main__":
main()

我尝试制作一个循环遍历所有年份和月份的 for 循环。它创建了文件,但里面没有数据,只显示年份。这是新代码:

#!/usr/bin/python
#weather.scraper
from bs4 import BeautifulSoup
import urllib
import json

allData = []
# this loops through all the Weather years
for y in range(1941, 2017):
yearData = {}
yearData['year'] = y
months = []
for m in range(1, 13):
def main():
# weatherData = weather_philadelphia_data #Json beginns here
# with open(jsonfile, 'w') as outputFile:
# json.dump(weatherData, outputFile)
# scrapping beginns here
url = "https://www.wunderground.com/history/airport/KPHL/%d/%d/1/MonthlyHistory.html" % (y, m)
r = urllib.urlopen(url).read()
soup = BeautifulSoup(r, "html.parser")
tables = soup.find_all("table", class_="responsive airport-history-summary-table")

weatherPerMonth = {}
weatherdata = []
for table in tables: #reason for it to do it 12x

for tr in table.find_all("tr"):
firstTd = tr.find("td")
if firstTd and firstTd.has_attr("class") and "indent" in firstTd['class']:
values = {}
tds = tr.find_all("td")
maxVal = tds[1].find("span", class_="wx-value")
avgVal = tds[2].find("span", class_="wx-value")
minVal = tds[3].find("span", class_="wx-value")
if maxVal:
values['max'] = maxVal.text
if avgVal:
values['avg'] = avgVal.text
if minVal:
values['min'] = minVal.text
if len(tds) > 4:
sumVal = tds[4].find("span", class_="wx-value")
if sumVal:
values['sum'] = sumVal.text
scrapedData = {}
scrapedData[firstTd.text] = values
weatherdata.append(scrapedData)
break
monthData = {}
monthData['month'] = m
monthData['weather'] = weatherPerMonth
months.append(monthData)
yearData['months'] = months
allData.append(yearData)

with open ("allData_philly.json", 'w' ) as outFile:
json.dump(allData, outFile, indent=2)


print "done"
if __name__ == "__main__":
main()

这是它生成的输出文件的一部分。

[  
{
"months": [],
"year": 1941
},
]

直到2016年都是这样。

问题如下。我想要一个文件,其中包含 1941-2016 年 12 个月(一月至十二月)的天气数据,它应该如下所示:

[
{
"months": [{
'month': 12
'weather' : {
"Max Temperature": {
"max": "18",
"avg": "6",
"min": "-2"
}
},
{
"Mean Temperature": {
"max": "12",
"avg": "1",
"min": "-6"
}
},
{
"Min Temperature": {
"max": "6",
"avg": "-3",
"min": "-11"
}

}],
"year": 1941
},
]

但我不明白为什么我的代码不起作用,我希望有人能提供帮助!

最佳答案

您的代码看起来不错,只是有一些小问题阻止您获得正确的输出。

  • def main(): 位于循环内部,因此当您调用 main() 时,它不会循环所有年份。在您的第一个示例中看起来不错。
  • weatherPerMonth 声明为空列表,然后将其分配给 monthData['weather']。您的实际数据位于 weatherdata 中,但它永远不会写入任何地方。
  • 下面的代码只是对您的代码进行了较小的修改,进行了一些重新排列和缩进更改,但它应该会为您提供所需的输出。
<小时/>
#weather.scraper
from bs4 import BeautifulSoup
import urllib.request
import json

allData = []
# this loops through all the Weather years
for y in range(2012, 2014):
yearData = {}
yearData['year'] = y
months = []
for m in range(1, 13):
# weatherData = weather_philadelphia_data #Json beginns here
# with open(jsonfile, 'w') as outputFile:
# json.dump(weatherData, outputFile)
# scrapping beginns here
url = "https://www.wunderground.com/history/airport/KPHL/%d/%d/1/MonthlyHistory.html" % (y, m)
r = urllib.request.urlopen(url).read()
soup = BeautifulSoup(r, "html.parser")
tables = soup.find_all("table", class_="responsive airport-history-summary-table")

weatherPerMonth = {}
weatherdata = []

monthData = {}

for table in tables: #reason for it to do it 12x

for tr in table.find_all("tr"):
firstTd = tr.find("td")
if firstTd and firstTd.has_attr("class") and "indent" in firstTd['class']:
values = {}
tds = tr.find_all("td")

maxVal = tds[1].find("span", class_="wx-value")
avgVal = tds[2].find("span", class_="wx-value")
minVal = tds[3].find("span", class_="wx-value")
if maxVal:
values['max'] = maxVal.text
if avgVal:
values['avg'] = avgVal.text
if minVal:
values['min'] = minVal.text
if len(tds) > 4:
sumVal = tds[4].find("span", class_="wx-value")
if sumVal:
values['sum'] = sumVal.text
scrapedData = {}
scrapedData[firstTd.text] = values

weatherdata.append(scrapedData)
monthData['month'] = m
monthData['weather'] = values
break


months.append(monthData)
yearData['months'] = months
allData.append(yearData)

with open ("allData_philly.json", 'w' ) as outFile:
json.dump(allData, outFile, indent=2)

关于python - 输出文件留下几个月没有天气数据的数据,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/37280309/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com