gpt4 book ai didi

python - 如何使用 YouTube API v3 和 Python 从视频中获取评论?

转载 作者:行者123 更新时间:2023-11-28 18:33:35 24 4
gpt4 key购买 nike

我一直在尝试使用 Python(作为学习这门语言的练习)从 YouTube 上的给定视频中获取评论(线程和回复)。

根据官方网站(https://developers.google.com/youtube/v3/docs/commentThreads/list)给出的示例,我能够得到一些评论,但不是全部。我尝试添加一些代码来处理多个页面,但我无法获取只有一个页面的视频的评论。

例如,https://www.youtube.com/watch?v=Gd_L7DVKTA8有 17 条评论(包括回复),但我只能获得 7 个线程和 2 个回复。有趣的是,我使用上面链接提供的 API Explorer 得到了相同的结果(只有 7 个线程)。

我的代码如下:

#!/usr/bin/python

# Usage:
# python scraper.py --videoid='<video_id>'

from apiclient.errors import HttpError
from oauth2client.tools import argparser
from apiclient.discovery import build

YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
DEVELOPER_KEY = 'key'


def get_comment_threads(youtube, video_id, comments):
threads = []
results = youtube.commentThreads().list(
part="snippet",
videoId=video_id,
textFormat="plainText",
).execute()

#Get the first set of comments
for item in results["items"]:
threads.append(item)
comment = item["snippet"]["topLevelComment"]
text = comment["snippet"]["textDisplay"]
comments.append(text)

#Keep getting comments from the following pages
while ("nextPageToken" in results):
results = youtube.commentThreads().list(
part="snippet",
videoId=video_id,
pageToken=results["nextPageToken"],
textFormat="plainText",
).execute()
for item in results["items"]:
threads.append(item)
comment = item["snippet"]["topLevelComment"]
text = comment["snippet"]["textDisplay"]
comments.append(text)

print "Total threads: %d" % len(threads)

return threads


def get_comments(youtube, parent_id, comments):
results = youtube.comments().list(
part="snippet",
parentId=parent_id,
textFormat="plainText"
).execute()

for item in results["items"]:
text = item["snippet"]["textDisplay"]
comments.append(text)

return results["items"]

if __name__ == "__main__":
argparser.add_argument("--videoid", help="Required; ID for video for which the comment will be inserted.")
args = argparser.parse_args()
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)

try:
output_file = open("output.txt", "w")
comments = []
video_comment_threads = get_comment_threads(youtube, args.videoid, comments)

for thread in video_comment_threads:
get_comments(youtube, thread["id"], comments)

for comment in comments:
output_file.write(comment.encode("utf-8") + "\n")

output_file.close()
print "Total comments: %d" % len(comments)

except HttpError, e:
print "An HTTP error %d occurred:\n%s" % (e.resp.status, e.content)

提前感谢您的任何建议!

最佳答案

我正在使用这段代码

import os
import pickle
import google.oauth2.credentials
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

CLIENT_SECRETS_FILE = "client_secret.json" # for more information to create your credentials json please visit https://python.gotrained.com/youtube-api-extracting-comments/
SCOPES = ['https://www.googleapis.com/auth/youtube.force-ssl']
API_SERVICE_NAME = 'youtube'
API_VERSION = 'v3'

def get_authenticated_service():
credentials = None
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
credentials = pickle.load(token)
# Check if the credentials are invalid or do not exist
if not credentials or not credentials.valid:
# Check if the credentials have expired
if credentials and credentials.expired and credentials.refresh_token:
credentials.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
CLIENT_SECRETS_FILE, SCOPES)
credentials = flow.run_console()

# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(credentials, token)

return build(API_SERVICE_NAME, API_VERSION, credentials = credentials)

def get_video_comments(service, **kwargs):
comments = []
results = service.commentThreads().list(**kwargs).execute()
while results:
for item in results['items']:
comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
comments.append(comment)
# Check if another page exists
if 'nextPageToken' in results:
kwargs['pageToken'] = results['nextPageToken']
results = service.commentThreads().list(**kwargs).execute()
else:
break

return comments


if __name__ == '__main__':
# When running locally, disable OAuthlib's HTTPs verification. When
# running in production *do not* leave this option enabled.
os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
service = get_authenticated_service()
videoId = input('Enter Video id : ') # video id here (the video id of https://www.youtube.com/watch?v=vedLpKXzZqE -> is vedLpKXzZqE)
comments = get_video_comments(service, part='snippet', videoId=videoId, textFormat='plainText')

print(len(comments),comments)

祝你好运

关于python - 如何使用 YouTube API v3 和 Python 从视频中获取评论?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/34606055/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com