gpt4 book ai didi

c - 使用 PortAudio 获取 flite 输出音频

转载 作者:太空狗 更新时间:2023-10-29 16:47:40 28 4
gpt4 key购买 nike

我正在尝试获取 flite speech synthesis library在我的 Mac 上工作,但 flite 库不支持我的声音架构。为了解决这个问题,我正在使用 PortAudio播放合成音频;所以我不得不在 audio.c 文件中进行一些修改,让 flite 能够使用该库。在 GNU AutoTools 中折腾了一段时间后,我设法让所有的东西都编译得很好,但随后我运行了程序并得到了这个输出:

$ ./flite -t "test"
frameIndex: 0
maxFrameIndex: 0
numChannels: 1
numSamples: 7225
sampleRate: 8000
=== Now playing back. ===
Waiting for playback to finish.
frameIndex in callback: -2008986336
maxFrameIndex in callback: 32655
numChannels in callback: 152579008
numSamples in callback: 0
sampleRate in callback: 0
Segmentation fault: 11

$ ./flite -t "test"
frameIndex: 0
maxFrameIndex: 0
numChannels: 1
numSamples: 7225
sampleRate: 8000
=== Now playing back. ===
Waiting for playback to finish.
frameIndex in callback: -71217888
maxFrameIndex in callback: 32712
numChannels in callback: 232979392
numSamples in callback: 0
sampleRate in callback: 0
Segmentation fault: 11

这是 audio.c 文件中的相关代码,当我提供命令行参数 -t 时会调用它。经过一些调试后,我在 playCallback() 函数中标记了出现段错误的感兴趣区域。

static int playCallback( const void *inputBuffer, void *outputBuffer,
unsigned long framesPerBuffer,
const PaStreamCallbackTimeInfo* timeInfo,
PaStreamCallbackFlags statusFlags,
void *userData )
{
cst_wave *data = (cst_wave*)userData;
short *rptr = &data->samples[data->frameIndex * data->num_channels];
short *wptr = (short*)outputBuffer;
unsigned int i;
int finished;
unsigned int framesLeft = cst_wave_maxFrameIndex(data) - cst_wave_frameIndex(data);

(void) inputBuffer; /* Prevent unused variable warnings. */
(void) timeInfo;
(void) statusFlags;
(void) userData;

printf("frameIndex in callback: %d\n", cst_wave_frameIndex(data));
printf("maxFrameIndex in callback: %d\n", cst_wave_maxFrameIndex(data));
printf("numChannels in callback: %d\n", cst_wave_num_channels(data));
printf("numSamples in callback: %d\n", cst_wave_num_samples(data));
printf("sampleRate in callback: %d\n\n", cst_wave_sample_rate(data));

if( framesLeft < framesPerBuffer )
{
/* final buffer... */
for( i=0; i<framesLeft; i++ )
{
*wptr++ = *rptr++; /* left */
if( cst_wave_num_channels(data) == 2 ) *wptr++ = *rptr++; /* right */
}
for( ; i<framesPerBuffer; i++ )
{
*wptr++ = 0; /* left */
if( cst_wave_num_channels(data) == 2) *wptr++ = 0; /* right */
}
data->frameIndex += framesLeft;
finished = paComplete;
}
else
{
for( i=0; i<framesPerBuffer; i++ )
{
*wptr++ = *rptr++; /* left */
if( cst_wave_num_channels(data) == 2 ) *wptr++ = *rptr++; /* right */
}
cst_wave_set_frameIndex(data, framesPerBuffer);
finished = paContinue;
}
return finished;
}

int play_wave(cst_wave *w)
{
PaStream* stream;
PaStreamParameters outputParameters;
cst_wave_set_frameIndex(w, 0);
cst_wave_set_maxFrameIndex(w, (cst_wave_num_samples(w) / cst_wave_sample_rate(w)) * cst_wave_num_channels(w) * sizeof(short));
int err = 0;
err = Pa_Initialize();
outputParameters.device = Pa_GetDefaultOutputDevice();
if (outputParameters.device == paNoDevice)
{
fprintf(stderr,"Error: No default output device.\n");
return -5;
}
printf("frameIndex: %d\n", cst_wave_frameIndex(w));
printf("maxFrameIndex: %d\n", cst_wave_maxFrameIndex(w));
printf("numChannels: %d\n", cst_wave_num_channels(w));
printf("numSamples: %d\n", cst_wave_num_samples(w));
printf("sampleRate: %d\n", cst_wave_sample_rate(w));

outputParameters.channelCount = cst_wave_num_channels(w);
outputParameters.sampleFormat = paInt16;
outputParameters.suggestedLatency = Pa_GetDeviceInfo( outputParameters.device )->defaultLowOutputLatency;
outputParameters.hostApiSpecificStreamInfo = NULL;
puts("=== Now playing back. ===");
err = Pa_OpenStream(&stream,
NULL, /* no input */
&outputParameters,
cst_wave_sample_rate(w),
512,
paClipOff,
playCallback,
&w);
if( stream )
{
err = Pa_StartStream( stream );
if( err != paNoError ) goto done;

puts("Waiting for playback to finish.");

while((err = Pa_IsStreamActive(stream)) == 1) Pa_Sleep(100);
if( err < 0 ) goto done;

err = Pa_CloseStream( stream );
if( err != paNoError ) goto done;

puts("Done.");
}
done:
Pa_Terminate();
free(cst_wave_samples(w));
}

因为它是相关的,我还稍微修改了 cst_wave.h 中的 cst_wave 结构,使其包含我必须存储的数据,并添加了一些#defines 到已经存在的那些:

typedef struct  cst_wave_struct {
const char *type;
int frameIndex;
int maxFrameIndex;
int sample_rate;
int num_samples;
int num_channels;
short *samples;
} cst_wave;

#define cst_wave_num_samples(w) (w?w->num_samples:0)
#define cst_wave_num_channels(w) (w?w->num_channels:0)
#define cst_wave_sample_rate(w) (w?w->sample_rate:0)
#define cst_wave_samples(w) (w->samples)
#define cst_wave_frameIndex(w) (w->frameIndex)
#define cst_wave_maxFrameIndex(w) (w->maxFrameIndex)

#define cst_wave_set_num_samples(w,s) w->num_samples=s
#define cst_wave_set_num_channels(w,s) w->num_channels=s
#define cst_wave_set_sample_rate(w,s) w->sample_rate=s
#define cst_wave_set_frameIndex(w,s) w->frameIndex=s
#define cst_wave_set_maxFrameIndex(w,s) w->maxFrameIndex=s

更新 1:

按照@Rohan 的建议现在给我这个输出:

$ ./bin/flite -t "test"
frameIndex: 0
maxFrameIndex: 0
numChannels: 1
numSamples: 7225
sampleRate: 8000
=== Now playing back. ===
Waiting for playback to finish.
frameIndex in callback: 0
maxFrameIndex in callback: 0
numChannels in callback: 1
numSamples in callback: 7225
sampleRate in callback: 8000

Done.
flite(68929,0x7fff71c0d310) malloc: *** error for object 0x7fd6e2809800: pointer being freed was not allocated
*** set a breakpoint in malloc_error_break to debug
Abort trap: 6

为了解决这个问题,我删除了 free(cst_wave_samples(w));。现在程序正常执行,没有明显的错误,但我的 Mac 上仍然没有音频输出。有什么建议吗?

最佳答案

在我看来问题可能出在其他地方。

总而言之,您添加评论的例程真的很微不足道。它基本上只是将一个充满数据的缓冲区从一个地方复制到另一个地方,如果数据没有填满输入缓冲区,则用零填充其余部分。如果我正在编写代码,我可能会按照这些一般思路做更多的事情:

const unsigned frame_size = sizeof(short) * data->num_channels;    

char *source = &data->samples[data->frameIndex * data->num_channels];
char *dest = outputBuffer;

unsigned framesLeft = data->maxFrameIndex - data->frameIndex;
unsigned framesEmpty = framesPerBuffer - framesLeft;

memcpy(source, dest, framesLeft * frame_size);
memset(dest+framesLeft * frame_size, 0, framesEmpty * frame_size);

data->frameIndex += framesPerBuffer;

尽管写得相当笨拙,问题中的 if/else 只是跳过了 memset 部分,如果需要的话被填充为零。

因此,这会将一个充满数据的缓冲区从一个地方复制到另一个地方,并用零填充任何剩余部分。如果您遇到段错误,则显然分配目标缓冲区的任何内容都没有在那里分配足够的空间。如果不做一些观察,就不可能猜测分配是否发生在 Pa_InitializePa_OpenStreamPa_StartStream 或其他地方——而且很可能您不太关心实际执行分配的代码,而是计算要分配多少空间的代码(可能在上述代码之一中,或者完全在其他地方)。

关于c - 使用 PortAudio 获取 flite 输出音频,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/23820713/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com