gpt4 book ai didi

python - 使用 Watson for Python 进行连续实时语音转文本

转载 作者:太空宇宙 更新时间:2023-11-04 04:56:23 25 4
gpt4 key购买 nike

我正在尝试创建一个小型 Python 程序,它可以让我使用我的麦克风从 Watson 服务器实时获取文本,类似于它的工作方式 here .

这是我想出的代码,但它会在我完成录制后获取文本:

import pyaudio
import json
from watson_developer_cloud import SpeechToTextV1

CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 10

p = pyaudio.PyAudio()

stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording")

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)

print("* done recording")

stream.stop_stream()
stream.close()
p.terminate()

data_feed = b''.join(frames)

speech_to_text = SpeechToTextV1(
username='secret',
password='secret too',
x_watson_learning_opt_out=False
)

result = speech_to_text.recognize(data_feed,
content_type="audio/l16;rate=44100;channels=2",
word_confidence=True,
max_alternatives=4,
word_alternatives_threshold=0.5,
model="en-US_BroadbandModel",
continuous=True)

j = json.dumps(result, indent=2)
print(j)

最佳答案

我继续从头开始创建一个程序,使用 websockets 连接到 Watson 服务器。它仍然没有完全按照我的预期进行,但它非常接近。

音频正在实时发送到服务器,但我在录制完成后收到转录本。

import asyncio
import websockets
import json
import requests
import pyaudio
import time

# Variables to use for recording audio
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 16000

p = pyaudio.PyAudio()

# This is the language model to use to transcribe the audio
model = "en-US_BroadbandModel"

# These are the urls we will be using to communicate with Watson
default_url = "https://stream.watsonplatform.net/speech-to-text/api"
token_url = "https://stream.watsonplatform.net/authorization/api/v1/token?" \
"url=https://stream.watsonplatform.net/speech-to-text/api"
url = "wss://stream.watsonplatform.net/speech-to-text/api/v1/recognize?model=en-US_BroadbandModel"

# BlueMix app credentials
username = "" # Your Bluemix App username
password = "" # Your Bluemix App password

# Send a request to get an authorization key
r = requests.get(token_url, auth=(username, password))
auth_token = r.text
token_header = {"X-Watson-Authorization-Token": auth_token}

# Params to use for Watson API
params = {
"word_confidence": True,
"content_type": "audio/l16;rate=16000;channels=2",
"action": "start",
"interim_results": True
}

# Opens the stream to start recording from the default microphone
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
frames_per_buffer=CHUNK)


async def send_audio(ws):
# Starts recording of microphone
print("* READY *")

start = time.time()
while True:
try:
print(".")
data = stream.read(CHUNK)
await ws.send(data)
if time.time() - start > 20: # Records for n seconds
await ws.send(json.dumps({'action': 'stop'}))
return False
except Exception as e:
print(e)
return False

# Stop the stream and terminate the recording
stream.stop_stream()
stream.close()
p.terminate()


async def speech_to_text():
async with websockets.connect(url, extra_headers=token_header) as conn:
# Send request to watson and waits for the listening response
send = await conn.send(json.dumps(params))
rec = await conn.recv()
print(rec)
asyncio.ensure_future(send_audio(conn))

# Keeps receiving transcript until we have the final transcript
while True:
try:
rec = await conn.recv()
parsed = json.loads(rec)
transcript = parsed["results"][0]["alternatives"][0]["transcript"]
print(transcript)
#print(parsed)
if "results" in parsed:
if len(parsed["results"]) > 0:
if "final" in parsed["results"][0]:
if parsed["results"][0]["final"]:
#conn.close()
#return False
pass
except KeyError:
conn.close()
return False

# Starts the application loop
loop = asyncio.get_event_loop()
loop.run_until_complete(speech_to_text())
loop.close()

所以我现在只想在通过麦克风录音的同时获得成绩单。

关于python - 使用 Watson for Python 进行连续实时语音转文本,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/46966787/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com