gpt4 book ai didi

梅尔倒谱系数(MFCC)实现

转载 作者:qq735679552 更新时间:2022-09-28 22:32:09 36 4
gpt4 key购买 nike

CFSDN坚持开源创造价值,我们致力于搭建一个资源共享平台,让每一个IT人在这里找到属于你的精彩世界.

这篇CFSDN的博客文章梅尔倒谱系数(MFCC)实现由作者收集整理,如果你对这篇文章有兴趣,记得点赞哟.

本文实例为大家分享了梅尔倒谱系数实现代码,供大家参考,具体内容如下 。

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""
@author: zoutai
@file: mymfcc.py
@time: 2018/03/26
@description:
"""
from matplotlib.colors import boundarynorm
import librosa
import librosa.display
import numpy
import scipy.io.wavfile
from scipy.fftpack import dct
import matplotlib.pyplot as plt
import numpy as np
 
 
# 第一步-读取音频,画出时域图(采样率-幅度)
sample_rate, signal = scipy.io.wavfile.read( 'osr_us_000_0010_8k.wav' ) # file assumed to be in the same directory
signal = signal[ 0 : int ( 3.5 * sample_rate)]
# plot the wave
time = np.arange( 0 , len (signal)) * ( 1.0 / sample_rate)
# plt.plot(time,signal)
plt.xlabel( "time(s)" )
plt.ylabel( "amplitude" )
plt.title( "signal in the time domain " )
plt.grid( 'on' ) #标尺,on:有,off:无。
 
 
# 第二步-预加重
# 消除高频信号。因为高频信号往往都是相似的,
# 通过前后时间相减,就可以近乎抹去高频信号,留下低频信号。
# 原理:y(t)=x(t)−αx(t−1)
 
pre_emphasis = 0.97
emphasized_signal = numpy.append(signal[ 0 ], signal[ 1 :] - pre_emphasis * signal[: - 1 ])
 
 
time = np.arange( 0 , len (emphasized_signal)) * ( 1.0 / sample_rate)
# plt.plot(time,emphasized_signal)
# plt.xlabel("time(s)")
# plt.ylabel("amplitude")
# plt.title("signal in the time domain after pre-emphasis")
# plt.grid('on')#标尺,on:有,off:无。
 
 
# 第三步、取帧,用帧表示
frame_size = 0.025 # 帧长
frame_stride = 0.01 # 步长
 
# frame_length-一帧对应的采样数, frame_step-一个步长对应的采样数
frame_length, frame_step = frame_size * sample_rate, frame_stride * sample_rate # convert from seconds to samples
signal_length = len (emphasized_signal) # 总的采样数
 
frame_length = int ( round (frame_length))
frame_step = int ( round (frame_step))
 
# 总帧数
num_frames = int (numpy.ceil( float (numpy. abs (signal_length - frame_length)) / frame_step)) # make sure that we have at least 1 frame
 
pad_signal_length = num_frames * frame_step + frame_length
z = numpy.zeros((pad_signal_length - signal_length))
pad_signal = numpy.append(emphasized_signal, z) # pad signal to make sure that all frames have equal number of samples without truncating any samples from the original signal
 
# construct an array by repeating a(200) the number of times given by reps(348).
# 这个写法太妙了。目的:用矩阵来表示帧的次数,348*200,348-总的帧数,200-每一帧的采样数
# 第一帧采样为0、1、2...200;第二帧为80、81、81...280..依次类推
indices = numpy.tile(numpy.arange( 0 , frame_length), (num_frames, 1 )) + numpy.tile(numpy.arange( 0 , num_frames * frame_step, frame_step), (frame_length, 1 )).t
frames = pad_signal[indices.astype(numpy.int32, copy = false)] # copy of the array indices
# frame:348*200,横坐标348为帧数,即时间;纵坐标200为一帧的200毫秒时间,内部数值代表信号幅度
 
# plt.matshow(frames, cmap='hot')
# plt.colorbar()
# plt.figure()
# plt.pcolormesh(frames)
 
 
# 第四步、加汉明窗
# 傅里叶变换默认操作的时间段内前后端点是连续的,即整个时间段刚好是一个周期,
# 但是,显示却不是这样的。所以,当这种情况出现时,仍然采用fft操作时,
# 就会将单一频率周期信号认作成多个不同的频率信号的叠加,而不是原始频率,这样就差生了频谱泄漏问题
 
frames * = numpy.hamming(frame_length) # 相乘,和卷积类似
# # frames *= 0.54 - 0.46 * numpy.cos((2 * numpy.pi * n) / (frame_length - 1)) # explicit implementation **
 
# plt.pcolormesh(frames)
 
 
# 第五步-傅里叶变换频谱和能量谱
 
# _raw_fft扫窗重叠,将348*200,扩展成348*512
nfft = 512
mag_frames = numpy.absolute(numpy.fft.rfft(frames, nfft)) # magnitude of the fft
pow_frames = (( 1.0 / nfft) * ((mag_frames) * * 2 )) # power spectrum
 
 
# plt.pcolormesh(mag_frames)
#
# plt.pcolormesh(pow_frames)
 
 
# 第六步,filter banks滤波器组
# 公式:m=2595*log10(1+f/700);f=700(10^(m/2595)−1)
nfilt = 40 #窗的数目
low_freq_mel = 0
high_freq_mel = ( 2595 * numpy.log10( 1 + (sample_rate / 2 ) / 700 )) # convert hz to mel
mel_points = numpy.linspace(low_freq_mel, high_freq_mel, nfilt + 2 ) # equally spaced in mel scale
hz_points = ( 700 * ( 10 * * (mel_points / 2595 ) - 1 )) # convert mel to hz
bin = numpy.floor((nfft + 1 ) * hz_points / sample_rate)
 
fbank = numpy.zeros((nfilt, int (numpy.floor(nfft / 2 + 1 ))))
for m in range ( 1 , nfilt + 1 ):
  f_m_minus = int ( bin [m - 1 ]) # left
  f_m = int ( bin [m])  # center
  f_m_plus = int ( bin [m + 1 ]) # right
 
  for k in range (f_m_minus, f_m):
  fbank[m - 1 , k] = (k - bin [m - 1 ]) / ( bin [m] - bin [m - 1 ])
  for k in range (f_m, f_m_plus):
  fbank[m - 1 , k] = ( bin [m + 1 ] - k) / ( bin [m + 1 ] - bin [m])
filter_banks = numpy.dot(pow_frames, fbank.t)
filter_banks = numpy.where(filter_banks = = 0 , numpy.finfo( float ).eps, filter_banks) # numerical stability
filter_banks = 20 * numpy.log10(filter_banks) # db;348*26
 
# plt.subplot(111)
# plt.pcolormesh(filter_banks.t)
# plt.grid('on')
# plt.ylabel('frequency [hz]')
# plt.xlabel('time [sec]')
# plt.show()
 
 
#
# 第七步,梅尔频谱倒谱系数-mfccs
num_ceps = 12 #取12个系数
cep_lifter = 22 #倒谱的升个数??
mfcc = dct(filter_banks, type = 2 , axis = 1 , norm = 'ortho' )[:, 1 : (num_ceps + 1 )] # keep 2-13
(nframes, ncoeff) = mfcc.shape
n = numpy.arange(ncoeff)
lift = 1 + (cep_lifter / 2 ) * numpy.sin(numpy.pi * n / cep_lifter)
mfcc * = lift #*
 
# plt.pcolormesh(mfcc.t)
# plt.ylabel('frequency [hz]')
# plt.xlabel('time [sec]')
 
 
# 第八步,均值化优化
# to balance the spectrum and improve the signal-to-noise (snr), we can simply subtract the mean of each coefficient from all frames.
 
filter_banks - = (numpy.mean(filter_banks, axis = 0 ) + 1e - 8 )
mfcc - = (numpy.mean(mfcc, axis = 0 ) + 1e - 8 )
 
# plt.subplot(111)
# plt.pcolormesh(mfcc.t)
# plt.ylabel('frequency [hz]')
# plt.xlabel('time [sec]')
# plt.show()
 
 
# 直接频谱分析
# plot the wave
# plt.specgram(signal,fs = sample_rate, scale_by_freq = true, sides = 'default')
# plt.ylabel('frequency(hz)')
# plt.xlabel('time(s)')
# plt.show()
 
 
 
plt.figure(figsize = ( 10 , 4 ))
mfccs = librosa.feature.melspectrogram(signal,sr = 8000 ,n_fft = 512 ,n_mels = 40 )
librosa.display.specshow(mfccs, x_axis = 'time' )
plt.colorbar()
plt.title( 'mfcc' )
plt.tight_layout()
plt.show()

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持我.

原文链接:https://blog.csdn.net/SoundSlow/article/details/79711227 。

最后此篇关于梅尔倒谱系数(MFCC)实现的文章就讲到这里了,如果你想了解更多关于梅尔倒谱系数(MFCC)实现的内容请搜索CFSDN的文章或继续浏览相关文章,希望大家以后支持我的博客! 。

36 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com