gpt4 book ai didi

python调用百度语音识别实现大音频文件语音识别功能

转载 作者:qq735679552 更新时间:2022-09-27 22:32:09 30 4
gpt4 key购买 nike

CFSDN坚持开源创造价值,我们致力于搭建一个资源共享平台,让每一个IT人在这里找到属于你的精彩世界.

这篇CFSDN的博客文章python调用百度语音识别实现大音频文件语音识别功能由作者收集整理,如果你对这篇文章有兴趣,记得点赞哟.

本文为大家分享了python实现大音频文件语音识别功能的具体代码,供大家参考,具体内容如下 。

实现思路:先用ffmpeg将其他非wav格式的音频转换为wav格式,并转换音频的声道(百度支持声道为1),采样率(值为8000),格式转换完成后,再用ffmpeg将音频切成百度.

支持的时长(30秒和60秒2种,本程序用的是30秒).

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# coding: utf-8
import json
import time
import base64
from inc import rtysdb
import urllib2
import requests
import os
import uuid
from inc import db_config
 
 
class BaiduRest:
   def __init__( self , cu_id, api_key, api_secert):
     self .token_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s"
     self .getvoice_url = "http://tsn.baidu.com/text2audio?tex=%s&lan=zh&cuid=%s&ctp=1&tok=%s"
     self .upvoice_url = 'http://vop.baidu.com/server_api'
 
     self .cu_id = cu_id
     self .get_token(api_key, api_secert)
     return
 
   def get_token( self , api_key, api_secert):
     token_url = self .token_url % (api_key, api_secert)
     r_str = urllib2.urlopen(token_url).read()
     token_data = json.loads(r_str)
     self .token_str = token_data[ 'access_token' ]
     return True
 
   # 语音合成
   def text2audio( self , text, filename):
     get_url = self .getvoice_url % (urllib2.quote(text), self .cu_id, self .token_str)
     voice_data = urllib2.urlopen(get_url).read()
     voice_fp = open (filename, 'wb+' )
     voice_fp.write(voice_data)
     voice_fp.close()
     return True
 
   ##语音识别
   def audio2text( self , filename):
     data = {}
     data[ 'format' ] = 'wav'
     data[ 'rate' ] = 8000
     data[ 'channel' ] = 1
     data[ 'cuid' ] = self .cu_id
     data[ 'token' ] = self .token_str
 
     wav_fp = open (filename, 'rb' )
     voice_data = wav_fp.read()
     data[ 'len' ] = len (voice_data)
     # data['speech'] = base64.b64encode(voice_data).decode('utf-8')
     data[ 'speech' ] = base64.b64encode(voice_data).replace( '\n' , '')
     # post_data = json.dumps(data)
     result = requests.post( self .upvoice_url, json = data, headers = { 'Content-Type' : 'application/json' })
     data_result = result.json()
     if (data_result[ 'err_msg' ] = = 'success.' ):
       return data_result[ 'result' ][ 0 ]
     else :
       return False
 
 
 
def test_voice(voice_file):
   api_key = "vossGHIgEETS6IMRxBDeahv8"
   api_secert = "3c1fe6a6312f41fa21fa2c394dad5510"
   bdr = BaiduRest( "0-57-7B-9F-1F-A1" , api_key, api_secert)
 
   # 生成
   #start = time.time()
   #bdr.text2audio("你好啊", "out.wav")
   #using = time.time() - start
   #print using
 
   # 识别
   #start = time.time()
   result = bdr.audio2text(voice_file)
   # result = bdr.audio2text("weather.pcm")
   #using = time.time() - start
   return result
 
def get_master_audio(check_status = 'cut_status' ):
   if check_status = = 'cut_status' :
     sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE status=0"
   elif check_status = = 'finished_status' :
     sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE finished_status=0"
   else :
     return False
   data = rtysdb.select_data(sql, 'more' )
   if data:
     return data
   else :
     return False
 
 
def go_recognize(master_id):
   section_path = db_config.SYS_PATH
   sql = "SELECT id,rid,url,status FROM ocenter_section WHERE rid=%d AND status=0 order by id asc limit 10" % (master_id)
   #print sql
   record = rtysdb.select_data(sql, 'more' )
   #print record
   if not record:
     return False
   for rec in record:
     #print section_path+'/'+rec[1]
     voice_file = section_path + '/' + rec[ 2 ]
     if not os.path.exists(voice_file):
       continue
     result = test_voice(voice_file)
     print result
     exit( 0 )
     if result:
       #rtysdb.update_by_pk('ocenter_section',rec[0],{'content':result,'status':1})
       sql = "update ocenter_section set content='%s', status='%d' where id=%d" % (result, 1 ,rec[ 0 ])      #print sql
       rtysdb.do_exec_sql(sql)
       parent_content = rtysdb.select_data( "SELECT id,content FROM ocenter_recognition WHERE id=%d" % (rec[ 1 ]))
       #print parent_content
       if parent_content:
         new_content = parent_content[ 1 ] + result
         update_content_sql = "update ocenter_recognition set content='%s' where id=%d" % (new_content,rec[ 1 ])
         rtysdb.do_exec_sql(update_content_sql)
     else :
       rtysdb.do_exec_sql( "update ocenter_section set status='%d' where id=%d" % (result, 1 ,rec[ 0 ]))
     time.sleep( 5 )
   else :
     rtysdb.do_exec_sql( "UPDATE ocenter_recognition SET finished_status=1 WHERE id=%d" % (master_id))
#对百度语音识别不了的音频文件进行转换
def ffmpeg_convert():
   section_path = db_config.SYS_PATH
   #print section_path
   used_audio = get_master_audio( 'cut_status' )
   #print used_audio
   if used_audio:
     for audio in used_audio:
       audio_path = section_path + '/' + audio[ 1 ]
       new_audio = uuid.uuid1()
       command_line = "ffmpeg -i " + audio_path + " -ar 8000 -ac 1 -f wav " + section_path + "/Uploads/Convert/convert_" + str (new_audio) + ".wav" ;
       #print command_line
       os.popen(command_line)
       if os.path.exists(section_path + "/Uploads/Convert/convert_" + str (new_audio) + ".wav" ):
         convert_name = "Uploads/Convert/convert_" + str (new_audio) + ".wav"
         ffmpeg_cut(convert_name,audio[ 3 ],audio[ 0 ])
         sql = "UPDATE ocenter_recognition SET status=1,convert_name='%s' where id=%d" % (convert_name,audio[ 0 ])
         rtysdb.do_exec_sql(sql)
#将大音频文件切成碎片
def ffmpeg_cut(convert_name,sharps,master_id):
   section_path = db_config.SYS_PATH
   if sharps> 0 :
     for i in range ( 0 ,sharps):
       timeArray = time.localtime(i * 30 )
       h = time.strftime( "%H" , timeArray)
       h = int (h) - 8
       h = "0" + str (h)
       ms = time.strftime( "%M:%S" ,timeArray)
       start_time = h + ':' + str (ms)
       cut_name = section_path + '/' + convert_name
       db_store_name = "Uploads/Section/" + str (uuid.uuid1()) + '-' + str (i + 1 ) + ".wav"
       section_name = section_path + "/" + db_store_name
       command_line = "ffmpeg.exe -i " + cut_name + " -vn -acodec copy -ss " + start_time + " -t 00:00:30 " + section_name
       #print command_line
       os.popen(command_line)
       data = {}
       data[ 'rid' ] = master_id
       data[ 'url' ] = db_store_name
       data[ 'create_time' ] = int (time.time())
       data[ 'status' ] = 0
       rtysdb.insert_one( 'ocenter_section' ,data)
 
if __name__ = = "__main__" :
   ffmpeg_convert()
   audio = get_master_audio( 'finished_status' )
   if audio:
      for ad in audio:
       go_recognize(ad[ 0 ])

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持我.

原文链接:https://blog.csdn.net/septwolves2015/article/details/78554524 。

最后此篇关于python调用百度语音识别实现大音频文件语音识别功能的文章就讲到这里了,如果你想了解更多关于python调用百度语音识别实现大音频文件语音识别功能的内容请搜索CFSDN的文章或继续浏览相关文章,希望大家以后支持我的博客! 。

30 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com