gpt4 book ai didi

python - 无法在 python 中加载 json 文件

转载 作者:行者123 更新时间:2023-12-01 04:36:22 27 4
gpt4 key购买 nike

我得到了一个 json 格式的 Twitter 流数据文件。现在我尝试在 python 中加载它:

import json

tweets_data=[]
tweets_file=open('test1.txt',"r")
for line in tweets_file:
try:
tweet=json.load(line)
tweets_data.append(tweet)
except:
continue

print(len(tweets_data))

结果始终为 0。如果删除“try”和“except”,则错误为“ValueError:期望值:第 2 行第 1 列(字符 1)”。但是,根据在线验证器,文件的每一行都是有效的 JSON。

这是 test1.txt 的一部分:

{"created_at":"Fri Jul 24 16:35:22 +0000 2015","id":624618886277640192,"id_str":"624618886277640192","text":"RT @nodenow: Essential Steps: Long Term Support for Node.js\nhttp:\/\/t.co\/MzPfvenwtT\n+1 micshasan #javascript","source":"\u003ca href=\"http:\/\/twitter.com\/download\/android\" rel=\"nofollow\"\u003eTwitter for Android\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":3290861609,"id_str":"3290861609","name":"Rajiin","screen_name":"Rajiin_07","location":"Pokhara city","url":"http:\/\/www.pokharacity.com","description":null,"protected":false,"verified":false,"followers_count":1101,"friends_count":1119,"listed_count":155,"favourites_count":2048,"statuses_count":5498,"created_at":"Wed May 20 04:58:23 +0000 2015","utc_offset":-25200,"time_zone":"Pacific Time (US & Canada)","geo_enabled":true,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"000000","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"4A913C","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"000000","profile_text_color":"000000","profile_use_background_image":false,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/617620457336893440\/3HTEKnMx_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/617620457336893440\/3HTEKnMx_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/3290861609\/1435854327","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"Fri Jul 24 16:33:04 +0000 2015","id":624618308050915328,"id_str":"624618308050915328","text":"Essential Steps: Long Term Support for Node.js\nhttp:\/\/t.co\/MzPfvenwtT\n+1 micshasan #javascript","source":"\u003ca href=\"http:\/\/ifttt.com\" rel=\"nofollow\"\u003eIFTTT\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":3243544179,"id_str":"3243544179","name":"Javascript Digest","screen_name":"nodenow","location":"","url":null,"description":null,"protected":false,"verified":false,"followers_count":1238,"friends_count":1,"listed_count":1148,"favourites_count":2,"statuses_count":130923,"created_at":"Sat May 09 15:45:13 +0000 2015","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"0084B4","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/597066594334941184\/Xe4tTtU8_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/597066594334941184\/Xe4tTtU8_normal.jpg","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweet_count":1,"favorite_count":0,"entities":{"hashtags":[{"text":"javascript","indices":[83,94]}],"trends":[],"urls":[{"url":"http:\/\/t.co\/MzPfvenwtT","expanded_url":"http:\/\/bit.ly\/1LH81ly","display_url":"bit.ly\/1LH81ly","indices":[47,69]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en"},"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"javascript","indices":[96,107]}],"trends":[],"urls":[{"url":"http:\/\/t.co\/MzPfvenwtT","expanded_url":"http:\/\/bit.ly\/1LH81ly","display_url":"bit.ly\/1LH81ly","indices":[60,82]}],"user_mentions":[{"screen_name":"nodenow","name":"Javascript Digest","id":3243544179,"id_str":"3243544179","indices":[3,11]}],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1437755722003"}


{"created_at":"Fri Jul 24 16:35:22 +0000 2015","id":624618888387432449,"id_str":"624618888387432449","text":"python \u041c\u043e\u0441\u043a\u0432\u0430 http:\/\/t.co\/itYJmgVvgD","source":"\u003ca href=\"http:\/\/gdepraktika.ru\" rel=\"nofollow\"\u003egdepraktika-trfnslator\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":623605809,"id_str":"623605809","name":"\u0413\u0434\u0435 \u043f\u0440\u0430\u043a\u0442\u0438\u043a\u0430?","screen_name":"gdepraktika","location":"\u0420\u043e\u0441\u0441\u0438\u044f","url":"http:\/\/gdepraktika.ru","description":"\u041f\u0440\u0430\u043a\u0442\u0438\u043a\u0430, \u0441\u0442\u0430\u0436\u0438\u0440\u043e\u0432\u043a\u0430, \u0440\u0430\u0431\u043e\u0442\u0430 \u0434\u043b\u044f \u0441\u0442\u0443\u0434\u0435\u043d\u0442\u043e\u0432, \u043e\u0431\u0443\u0447\u0435\u043d\u0438\u0435 \u0432 \u043a\u043e\u043c\u043f\u0430\u043d\u0438\u044f\u0445","protected":false,"verified":false,"followers_count":17,"friends_count":9,"listed_count":0,"favourites_count":0,"statuses_count":902069,"created_at":"Sun Jul 01 07:53:36 +0000 2012","utc_offset":10800,"time_zone":"Moscow","geo_enabled":false,"lang":"ru","contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"0084B4","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/378800000420815111\/bba61a6dcd4272794a4af41dd8a44cf5_normal.png","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/378800000420815111\/bba61a6dcd4272794a4af41dd8a44cf5_normal.png","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"trends":[],"urls":[{"url":"http:\/\/t.co\/itYJmgVvgD","expanded_url":"http:\/\/bit.ly\/1GqpqOg","display_url":"bit.ly\/1GqpqOg","indices":[15,37]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"und","timestamp_ms":"1437755722506"}

最佳答案

发生这种情况是因为有效的 json 行之间有两个空行。只需添加一个空行检查就可以了。

import json
tweets_data = []
notParsed = []
tweets_file = open('test1.txt',"r")
for line in tweets_file:
if line.strip():
try:
tweet=json.load(line)
tweets_data.append(tweet)
except:
notParsed.append(line)
continue
print(len(tweets_data))
print('Could not parse: ', len(notParsed))

这不是必需的,我只是因为您的回答而修改 Python,但您可以按如下方式编辑代码:

map(json.loads, [x for x in open('test1.txt').read().split('\n') if x.strip()])

关于python - 无法在 python 中加载 json 文件,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/31620163/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com