gpt4 book ai didi

python - 使用 python 3 抓取 Json

转载 作者:行者123 更新时间:2023-12-01 09:27:03 24 4
gpt4 key购买 nike

这是脚本:

from bs4 import BeautifulSoup as bs4
import requests
import json
from lxml import html
from pprint import pprint

import re


def get_data():

url = 'https://sports.bovada.lv//baseball/mlb/game-lines-market-group'
r = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.103 Safari/537.36"})
html_bytes = r.text
soup = bs4(html_bytes, 'lxml')

# res = soup.findAll('script') # find all scripts..

pattern = re.compile(r"swc_market_lists\s+=\s+(\{.*?\})")
script = soup.find("script", text=pattern)

return script.text[23:]

test1 = get_data()

data = json.loads(test1)


for item1 in data['items']:
data1 = item1['itemList']['items']
for item2 in data1:
pitch_a = item2['opponentAName']
pitch_b = item2['opponentBName']
## group = item2['displayGroups']
## for item3 in group:
## new_il = item3['itemList']
## for item4 in new_il:
## market = item4['description']
## oc = item4['outcomes']
print(pitch_a,pitch_b)


##for items in data['items']:
## pos = items['itemList']['items']
## for item in pos:
## work = item['competitors']
## pitcher_a = item['opponentAName']
## pitcher_b = item['opponentBName']
## group = item['displayGroups']
## for item, item2 in zip(work,group):
## team = item['abbreviation']
## place = item['type']
## il2 = item2['itemList']
## for item in il2:
## ml = item['description']

## print(team,place,pitcher_a,pitcher_b,ml)

我一直在努力抓取

团队缩写 = ['items']['itemList']['items']['competitors']['abbreviation']

home_away = ['items']['itemList']['items']['competitors']['type']

球队投手主页 = ['items']['itemList']['items']['opponentAName']

球队投手离开 = ['items']['itemList']['items']['opponentBName']

美国赔率赔率 = ['items']['itemList']['items']['displayGroups']['itemList']['outcomes']['price ']['美国']

总运行次数 = ['items']['itemList']['items']['displayGroups']['itemList']['outcomes']['price' ]['障碍']

部分Json pprinted:

[{'baseLink': '/baseball/mlb/game-lines-market-group',
'defaultType': True,
'description': 'Game Lines',
'id': '136',
'itemList': {'items': [{'LIVE': True,
'atmosphereLink': '/api/atmosphere/eventNotification/events/A/3149961',
'awayTeamFirst': True,
'baseLink': '/baseball/mlb/minnesota-twins-los-angeles-angels-201805112207',
'competitionId': '24736',
'competitors': [{'abbreviation': 'LAA',
'description': 'Los Angeles Angels',
'id': '3149961-1642',
'rotationNumber': '978',
'shortName': 'Angels',
'type': 'HOME'},
{'abbreviation': 'MIN',
'description': 'Minnesota Twins',
'id': '3149961-9990',
'rotationNumber': '977',
'shortName': 'Twins',
'type': 'AWAY'}],
'denySameGame': 'NO',
'description': 'Minnesota Twins @ Los Angeles Angels',
'displayGroups': [{'baseLink': '/baseball/mlb/game-lines-market-group',
'defaultType': True,
'description': 'Game Lines',
'id': '136',
'itemList': [{'belongsToDefault': True,
'columns': 'H2Columns',
'description': 'Moneyline',
'displayGroups': '136,A-136',
'id': '46892277',
'isInRunning': True,
'mainMarketType': 'MONEYLINE',
'mainPeriod': True,
'marketTypeGroup': 'MONEY_LINE',
'notes': '',
'outcomes': [{'competitorId': '3149961-9990',
'description': 'Minnesota '
'Twins',
'id': '211933276',
'price': {'american': '-475',
'decimal': '1.210526',
'fractional': '4/19',
'id': '1033002124',
'outcomeId': '211933276'},
'status': 'OPEN',
'type': 'A'},
{'competitorId': '3149961-1642',
'description': 'Los '
'Angeles '
'Angels',
'id': '211933277',
'price': {'american': '+310',
'decimal': '4.100',
'fractional': '31/10',
'id': '1033005679',
'outcomeId': '211933277'},
'status': 'OPEN',
'type': 'H'}],
'periodType': 'Live '
'Match',
'sequence': '14',
'sportCode': 'BASE',
'status': 'OPEN',
'type': 'WW'},
{'belongsToDefault': True,
'columns': 'H2Columns',
'description': 'Runline',
'displayGroups': '136,A-136',
'id': '46892287',
'isInRunning': True,
'mainMarketType': 'SPREAD',
'mainPeriod': True,
'marketTypeGroup': 'SPREAD',
'notes': '',
'outcomes': [{'competitorId': '3149961-9990',
'description': 'Minnesota '
'Twins',
'id': '211933278',
'price': {'american': '+800',
'decimal': '9.00',
'fractional': '8/1',
'handicap': '-1.5',
'id': '1033005677',
'outcomeId': '211933278'},
'status': 'OPEN',
'type': 'A'},
{'competitorId': '3149961-1642',
'description': 'Los '
'Angeles '
'Angels',
'id': '211933279',
'price': {'american': '-2000',
'decimal': '1.050',
'fractional': '1/20',
'handicap': '1.5',
'id': '1033005678',
'outcomeId': '211933279'},
'status': 'OPEN',
'type': 'H'}],
'periodType': 'Live '
'Match',
'sequence': '14',
'sportCode': 'BASE',
'status': 'OPEN',
'type': 'SPR'}],
'link': '/baseball/mlb/game-lines-market-group'}],
'feedCode': '13625145',
'id': '3149961',
'link': '/baseball/mlb/minnesota-twins-los-angeles-angels-201805112207',
'notes': '',
'numMarkets': 2,
'opponentAId': '214704',
'opponentAName': 'Tyler Skaggs (L)',
'opponentBId': '215550',
'opponentBName': 'Lance Lynn (R)',
'sport': 'BASE',
'startTime': 1526090820000,
'status': 'O',
'type': 'MLB'},

我在上面的脚本中启动了几个不同的循环,但它们中的任何一个都按照我想要的方式工作。

客队|客钱线|客场投手 |总运行次数 |我最终希望主队重复这一点。一旦以正确的方式解析了 csv,我就可以写入它。

感谢您的新视角,我花了一天的大部分时间来研究这个问题,试图找出访问我想要的内容的最佳方式。如果 Json 不是最好的方法,而 bs4 效果更好,我很想听听您的意见

最佳答案

您的问题没有简单的答案。抓取数据需要您仔细评估您正在处理的数据,找出您想要提取的部分所在的位置,并弄清楚如何有效地存储您提取的数据。

尝试打印循环中的数据以可视化代码中发生的情况(或尝试调试)。如果您迭代您的期望,从那里很容易找出它。在整个输入数据中查找模式,以帮助组织您提取的数据。

为了帮助自己,您应该为变量提供描述性名称,将代码分成逻辑 block ,并在代码开始变得复杂时添加注释。

这里有一些工作代码,但我鼓励您尝试我上面告诉您的内容,如果您仍然遇到困难,请查看下面的指导。

output = {}
root = data['items'][0]

for game_line in root['itemList']['items']:
# Create a temporary dict to store the data for this gameline
team_data = {}

# Get competitors
competitors = game_line['competitors']
for team in competitors:
team_type = team['type'] # either HOME or AWAY
# Create a new dict to store data for each team
team_data[team_type] = {}
team_data[team_type]['abbreviation'] = team['abbreviation']
team_data[team_type]['name'] = team['description']


# Get MoneyLine and Total Runs
for item in game_line['displayGroups'][0]['itemList']:
for outcome in item['outcomes']:
team_type = outcome['type'] # either A or H
team_type = 'HOME' if team_type == 'H' else 'AWAY'

if item['mainMarketType'] == 'MONEYLINE':
team_data[team_type]['moneyline'] = outcome['price']['american']
elif item['mainMarketType'] == 'SPREAD':
team_data[team_type]['total runs'] = outcome['price']['handicap']

# Get the pitchers
team_data['HOME']['pitcher'] = game_line['opponentAName']
team_data['AWAY']['pitcher'] = game_line['opponentBName']

# For each gameline, add the teamdata we gathered to the output dict
output[game_line['description']] = team_data

这会产生类似:

{
'Atlanta Braves @ Miami Marlins': {
'AWAY': {
'abbreviation': 'ATL',
'moneyline': '-130',
'name': 'Atlanta Braves',
'pitcher': 'Mike Soroka (R)',
'total runs': '-1.5'
},
'HOME': {
'abbreviation': 'MIA',
'moneyline': '+110',
'name': 'Miami Marlins',
'pitcher': 'Jarlin Garcia (L)',
'total runs': '1.5'
}
},
'Boston Red Sox @ Toronto Blue Jays': {
'AWAY': {
'abbreviation': 'BOS',
'moneyline': '-133',
'name': 'Boston Red Sox',
'pitcher': 'David Price (L)',
'total runs': '-1.5'
},
'HOME': {
'abbreviation': 'TOR',
'moneyline': '+113',
'name': 'Toronto Blue Jays',
'pitcher': 'Marco Estrada (R)',
'total runs': '1.5'
}
},

}

关于python - 使用 python 3 抓取 Json,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/50303405/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com