gpt4 book ai didi

python - 在 AWS Transcribe 中实时获取 BadRequestException

转载 作者:行者123 更新时间:2023-12-05 07:03:46 25 4
gpt4 key购买 nike

我从亚马逊流媒体 API 收到此响应。谁能帮我解决我在这里做错了什么。

b'\x00\x00\x00\xa3\x00\x00\x00ah\x10k\xe1\x0f:异常类型\x07\x00\x13BadRequestException\r:内容类型\x07\x00\x10application/json\r:message-type\x07\x00\texception{"Message":"收到意外的 WebSocket 帧。"}\xbd\xceK\x8a'

: 消息类型异常{"消息":"收到意外的 WebSocket 帧。"}½ÈK

我正在使用下面的代码

导入所有库

import asyncio
import websockets
import json
import sys, os, base64, datetime, hashlib, hmac, urllib
import pyaudio
import struct
import numpy as np
import wave
import argparse
import tempfile
import queue
import sys
import sounddevice as sd
import soundfile as sf
import numpy # Make sure NumPy is loaded before it is used in the callback
assert numpy # avoid "imported but unused" message (W0611)

使用 Soundevice 流式传输音频的代码

def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text


parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('-l', '--list-devices', action='store_true', help='show list of audio devices and exit')
args, remaining = parser.parse_known_args()
if args.list_devices:
print(sd.query_devices())
parser.exit(0)
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, parents=[parser])
parser.add_argument('filename', nargs='?', metavar='FILENAME',help='audio file to store recording to')
parser.add_argument('-d', '--device', type=int_or_str,help='input device (numeric ID or substring)')
parser.add_argument('-r', '--samplerate', type=int, help='sampling rate')
parser.add_argument('-c', '--channels', type=int, default=1, help='number of input channels')
parser.add_argument('-t', '--subtype', type=str, help='sound file subtype (e.g. "PCM_24")')
args = parser.parse_args(remaining)

q = queue.Queue()



def callback(indata, frames, time, status):
"""This is called (from a separate thread) for each audio block."""
if status:
print(status, file=sys.stderr)
q.put(indata.copy())

根据 amazon transcribe 的要求创建用于连接的 URl

def createPresignedUrl(data):
method = 'GET'
service = 'transcribe'
region = data['region']
host = 'transcribestreaming.' + region + '.amazonaws.com:8443'
endpoint = "wss://" + host

def sign(key, msg):
return hmac.new(key, msg.encode('utf-8'), hashlib.sha256).digest()

def getSignatureKey(key, dateStamp, regionName, serviceName):
kDate = sign(('AWS4' + key).encode('utf-8'), dateStamp)
kRegion = sign(kDate, regionName)
kService = sign(kRegion, serviceName)
kSigning = sign(kService, 'aws4_request')
return kSigning

access_key = data['key']
secret_key = data['secret']
if access_key == '' or secret_key == '':
print('No access key is available.')
sys.exit()

# Create a date for headers and the credential string
t = datetime.datetime.utcnow()
amz_date = t.strftime('%Y%m%dT%H%M%SZ') # Format date as YYYYMMDD'T'HHMMSS'Z'
datestamp = t.strftime('%Y%m%d') # Date w/o time, used in credential scope

canonical_uri = '/stream-transcription-websocket'

canonical_headers = 'host:' + host + '\n'
signed_headers = 'host'

algorithm = 'AWS4-HMAC-SHA256'
credential_scope = datestamp + '/' + region + '/' + service + '/' + 'aws4_request'

canonical_querystring = 'X-Amz-Algorithm=AWS4-HMAC-SHA256'
canonical_querystring += '&X-Amz-Credential=' + urllib.parse.quote_plus(access_key + '/' + credential_scope)
canonical_querystring += '&X-Amz-Date=' + amz_date
canonical_querystring += '&X-Amz-Expires=300'
canonical_querystring += '&X-Amz-SignedHeaders=' + signed_headers
canonical_querystring += '&language-code=' + data['languageCode']
canonical_querystring += '&media-encoding=pcm'
canonical_querystring += '&sample-rate=' + str(data['sampleRate'])

payload_hash = hashlib.sha256(('').encode('utf-8')).hexdigest()

# Step 6: Combine elements to create canonical request
canonical_request = method + '\n' + canonical_uri + '\n' + canonical_querystring + '\n' + canonical_headers + '\n' + signed_headers + '\n' + payload_hash

string_to_sign = algorithm + '\n' + amz_date + '\n' + credential_scope + '\n' + hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()

# Create the signing key
signing_key = getSignatureKey(secret_key, datestamp, region, service)

# Sign the string_to_sign using the signing_key
signature = hmac.new(signing_key, (string_to_sign).encode("utf-8"), hashlib.sha256).hexdigest()

canonical_querystring += '&X-Amz-Signature=' + signature

request_url = endpoint + canonical_uri + "?" + canonical_querystring

return request_url

data = {
'key': 'Add your key',
'secret': 'Add your secret key',
'region': 'us-east-1',
'languageCode': 'en-US',
'sampleRate': 44100
}

PCM编码代码

url = createPresignedUrl(data)

# FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = 16000
frames = []

# stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)

def pcmEncode(in_data):

offset = 0
input_len = len(in_data)
buffer = []

for i in range(input_len):
offset += 2
s = max(-1, min(1, in_data[i]))
b = (s * 32768) if (s < 0) else (s * 32767)
buffer.insert(offset, b)
buffer = np.array(buffer)
return buffer

def downsampleBuffer(buffer, outputSampleRate = 16000) :
if outputSampleRate == RATE :
return buffer
sampleRateRatio = RATE / outputSampleRate
newLength = round( len(buffer) / sampleRateRatio )
result = []
offsetResult = 0
offsetBuffer = 0

while offsetResult < newLength :
nextOffsetBuffer = round((offsetResult + 1) * sampleRateRatio)
accum = 0
count = 0

len_buffer = nextOffsetBuffer if nextOffsetBuffer < len(buffer) else len(buffer)

for i in range( offsetBuffer, len_buffer):
accum += buffer[i]
count += 1

result.append(accum / count)
offsetResult += 1
offsetBuffer = nextOffsetBuffer


return result

与 AWS 建立连接以进行转录

async def start_stream():

try:
connection = websockets.connect(url)
stream = sd.Stream(samplerate=args.samplerate, blocksize=None, device=args.device,
channels=args.channels, dtype=None, latency=None, extra_settings=None,
callback=None, finished_callback=None, clip_off=None, dither_off=None,
never_drop_input=None, prime_output_buffers_using_stream_callback=None)
stream.start()
while True:

a = stream.read(200)
buffer = downsampleBuffer(a[0])
result = pcmEncode(buffer)
async with connection as ws:
sent_data = {
"headers": {
":message-type": {"type": "string", "value": "event"},
":event-type": {"type": "string", "value": "AudioEvent"},
":content-type": {"type": "string", "value": "application/octet-stream"}
},
"body": str(result)
}
await ws.send(json.dumps(sent_data))
response = await ws.recv()
print(response)
print(response.decode('latin1'))

except KeyboardInterrupt:

parser.exit('\nInterrupted by user')

except Exception as e:

parser.exit(type(e).__name__ + ': ' + str(e))

if status:
parser.exit('Error during playback: ' + str(status))

asyncio.get_event_loop().run_until_complete(start_stream())
stream.stop()
stream.close()

最佳答案

我现在相信 BadRequestException 指的是帧编码不正确,而不是音频数据错误。我发现您的代码存在一些问题:

  1. 您需要以特殊方式对 header /正文进行编码:https://docs.aws.amazon.com/transcribe/latest/dg/event-stream.html

  2. 您需要非常熟练地处理您发送的缓冲区。音频需要是 16 位/无符号 (int)/little-endian ( See here )。现在您只是将 float (您的麦克风数据是 float 吗??)转换为 16 位友好,但将其存储在位大小由系统决定的缓冲区中(可能是 32 位或 64 位)并且您正在使用 JSON 字符串编码器对其进行编码。之后不太可能是正确的格式。基本上,您需要一个缓冲区库,它可以让您设置具有指定位大小 (16) 和字节序值(小)的 int。例如,这是我的飞镖代码:

for (var i=0; i<audioChunk.length; i++) {
messageBytes.setInt16(offset, audioChunk[i], Endian.little);
offset += 2;
}
  1. 当您使用上面提到的 EventStream header 编码时,您必须同样小心那些 32 位长度的整数是 BIG endian。同样的规则适用。您需要以允许您指定位大小和字节顺序的方式写入字节缓冲区。

继续的最佳方法是编写 AWS 响应无论如何都需要的解码函数,然后解码您的 编码帧并查看结果是否相同。为音频使用测试数据,例如 [-32000, -100, 0, 200 31000] 或类似的东西,这样您就可以测试字节顺序等是否全部正确。

关于python - 在 AWS Transcribe 中实时获取 BadRequestException,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/63137516/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com