gpt4 book ai didi

c++ - Espeak SAPI/dll 在 Windows 上的使用?

转载 作者:塔克拉玛干 更新时间:2023-11-03 00:42:05 24 4
gpt4 key购买 nike

问题:我正在尝试使用 espeak 文本转语音引擎。所以我让它在 linux 上运行得很好(下面的代码)。现在我也想将这个基本程序移植到 Windows,但这几乎是不可能的...

部分问题是 Windows dll 只允许 AUDIO_OUTPUT_SYNCHRONOUS,这意味着它需要一个回调,但我无法弄清楚如何从回调中播放音频...首先它崩溃了,然后我意识到,我需要一个回调函数,现在我在回调函数中获取了数据,但我不知道如何播放它...因为它既不是 wav 文件,也不像在 Linux 上那样自动播放。

sourceforge 网站没什么用,因为它基本上说使用 SAPI 版本,但是没有关于如何使用 sapi espeak dll 的示例...

无论如何,这是我的代码,有人可以帮忙吗?

#ifdef __cplusplus
#include <cstdio>
#include <cstdlib>
#include <cstring>
#else
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif

#include <assert.h>
#include <ctype.h>

//#include "speak_lib.h"
#include "espeak/speak_lib.h"

// libespeak-dev: /usr/include/espeak/speak_lib.h
// apt-get install libespeak-dev
// apt-get install libportaudio-dev

// g++ -o mine mine.cpp -lespeak
// g++ -o mine mine.cpp -I/usr/include/espeak/ -lespeak
// gcc -o mine mine.cpp -I/usr/include/espeak/ -lespeak


char voicename[40];
int samplerate;
int quiet = 0;
static char genders[4] = {' ','M','F',' '};

//const char *data_path = "/usr/share/"; // /usr/share/espeak-data/
const char *data_path = NULL; // use default path for espeak-data


int strrcmp(const char *s, const char *sub)
{
int slen = strlen(s);
int sublen = strlen(sub);
return memcmp(s + slen - sublen, sub, sublen);
}


char * strrcpy(char *dest, const char *source)
{
// Pre assertions
assert(dest != NULL);
assert(source != NULL);
assert(dest != source);

// tk: parentheses
while((*dest++ = *source++))
;
return(--dest);
}

const char* GetLanguageVoiceName(const char* pszShortSign)
{
#define LANGUAGE_LENGTH 30
static char szReturnValue[LANGUAGE_LENGTH] ;
memset(szReturnValue, 0, LANGUAGE_LENGTH);

for (int i = 0; pszShortSign[i] != '\0'; ++i)
szReturnValue[i] = (char) tolower(pszShortSign[i]);

const espeak_VOICE **voices;
espeak_VOICE voice_select;
voices = espeak_ListVoices(NULL);

const espeak_VOICE *v;
for(int ix=0; (v = voices[ix]) != NULL; ix++)
{
if( !strrcmp( v->languages, szReturnValue) )
{
strcpy(szReturnValue, v->name);
return szReturnValue;
}
} // End for

strcpy(szReturnValue, "default");
return szReturnValue;
} // End function getvoicename


void ListVoices()
{
const espeak_VOICE **voices;
espeak_VOICE voice_select;
voices = espeak_ListVoices(NULL);

const espeak_VOICE *v;
for(int ix=0; (v = voices[ix]) != NULL; ix++)
{
printf("Shortsign: %s\n", v->languages);
printf("age: %d\n", v->age);
printf("gender: %c\n", genders[v->gender]);
printf("name: %s\n", v->name);
printf("\n\n");
} // End for
} // End function getvoicename


int main()
{
printf("Hello World!\n");
const char* szVersionInfo = espeak_Info(NULL);

printf("Espeak version: %s\n", szVersionInfo);
samplerate = espeak_Initialize(AUDIO_OUTPUT_PLAYBACK,0,data_path,0);

strcpy(voicename, "default");
// espeak --voices
strcpy(voicename, "german");
strcpy(voicename, GetLanguageVoiceName("DE"));

if(espeak_SetVoiceByName(voicename) != EE_OK)
{
printf("Espeak setvoice error...\n");
}

static char word[200] = "Hello World" ;
strcpy(word, "TV-fäns aufgepasst, es ist 20 Uhr 15. Zeit für Rambo 3");
strcpy(word, "Unnamed Player wurde zum Opfer von GSG9");
int speed = 220;
int volume = 500; // volume in range 0-100 0=silence
int pitch = 50; // base pitch, range 0-100. 50=normal

// espeak.cpp 625
espeak_SetParameter(espeakRATE, speed, 0);
espeak_SetParameter(espeakVOLUME,volume,0);
espeak_SetParameter(espeakPITCH,pitch,0);
// espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal
// espeakPUNCTUATION: which punctuation characters to announce:
// value in espeak_PUNCT_TYPE (none, all, some),
espeak_VOICE *voice_spec = espeak_GetCurrentVoice();
voice_spec->gender=2; // 0=none 1=male, 2=female,
//voice_spec->age = age;

espeak_SetVoiceByProperties(voice_spec);


espeak_Synth( (char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL);
espeak_Synchronize();

strcpy(voicename, GetLanguageVoiceName("EN"));
espeak_SetVoiceByName(voicename);
strcpy(word, "Geany was fragged by GSG9 Googlebot");
strcpy(word, "Googlebot");

espeak_Synth( (char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL);
espeak_Synchronize();


espeak_Terminate();
printf("Espeak terminated\n");
return EXIT_SUCCESS;
}

/*
if(espeak_SetVoiceByName(voicename) != EE_OK)
{
memset(&voice_select,0,sizeof(voice_select));
voice_select.languages = voicename;
if(espeak_SetVoiceByProperties(&voice_select) != EE_OK)
{
fprintf(stderr,"%svoice '%s'\n",err_load,voicename);
exit(2);
}
}
*/

以上代码适用于 Linux。下面的代码是关于我在 Vista x64(32 位 emu)上得到的:

#ifdef __cplusplus
#include <cstdio>
#include <cstdlib>
#include <cstring>
#else
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#endif

#include <assert.h>
#include <ctype.h>

#include "speak_lib.h"
//#include "espeak/speak_lib.h"

// libespeak-dev: /usr/include/espeak/speak_lib.h
// apt-get install libespeak-dev
// apt-get install libportaudio-dev

// g++ -o mine mine.cpp -lespeak
// g++ -o mine mine.cpp -I/usr/include/espeak/ -lespeak
// gcc -o mine mine.cpp -I/usr/include/espeak/ -lespeak


char voicename[40];
int iSampleRate;
int quiet = 0;
static char genders[4] = {' ','M','F',' '};

//const char *data_path = "/usr/share/"; // /usr/share/espeak-data/
//const char *data_path = NULL; // use default path for espeak-data
const char *data_path = "C:\\Users\\Username\\Desktop\\espeak-1.43-source\\espeak-1.43-source\\";


int strrcmp(const char *s, const char *sub)
{
int slen = strlen(s);
int sublen = strlen(sub);
return memcmp(s + slen - sublen, sub, sublen);
}


char * strrcpy(char *dest, const char *source)
{
// Pre assertions
assert(dest != NULL);
assert(source != NULL);
assert(dest != source);

// tk: parentheses
while((*dest++ = *source++))
;
return(--dest);
}

const char* GetLanguageVoiceName(const char* pszShortSign)
{
#define LANGUAGE_LENGTH 30
static char szReturnValue[LANGUAGE_LENGTH] ;
memset(szReturnValue, 0, LANGUAGE_LENGTH);

for (int i = 0; pszShortSign[i] != '\0'; ++i)
szReturnValue[i] = (char) tolower(pszShortSign[i]);

const espeak_VOICE **voices;
espeak_VOICE voice_select;
voices = espeak_ListVoices(NULL);

const espeak_VOICE *v;
for(int ix=0; (v = voices[ix]) != NULL; ix++)
{
if( !strrcmp( v->languages, szReturnValue) )
{
strcpy(szReturnValue, v->name);
return szReturnValue;
}
} // End for

strcpy(szReturnValue, "default");
return szReturnValue;
} // End function getvoicename


void ListVoices()
{
const espeak_VOICE **voices;
espeak_VOICE voice_select;
voices = espeak_ListVoices(NULL);

const espeak_VOICE *v;
for(int ix=0; (v = voices[ix]) != NULL; ix++)
{
printf("Shortsign: %s\n", v->languages);
printf("age: %d\n", v->age);
printf("gender: %c\n", genders[v->gender]);
printf("name: %s\n", v->name);
printf("\n\n");
} // End for
} // End function getvoicename


/* Callback from espeak. Directly speaks using AudioTrack. */
#define LOGI(x) printf("%s\n", x)
static int AndroidEspeakDirectSpeechCallback(short *wav, int numsamples, espeak_EVENT *events)
{
char buf[100];
sprintf(buf, "AndroidEspeakDirectSpeechCallback: %d samples", numsamples);
LOGI(buf);

if (wav == NULL)
{
LOGI("Null: speech has completed");
}

if (numsamples > 0)
{
//audout->write(wav, sizeof(short) * numsamples);
sprintf(buf, "AudioTrack wrote: %d bytes", sizeof(short) * numsamples);
LOGI(buf);
}

return 0; // continue synthesis (1 is to abort)
}


static int AndroidEspeakSynthToFileCallback(short *wav, int numsamples,espeak_EVENT *events)
{
char buf[100];
sprintf(buf, "AndroidEspeakSynthToFileCallback: %d samples", numsamples);
LOGI(buf);

if (wav == NULL)
{
LOGI("Null: speech has completed");
}

// The user data should contain the file pointer of the file to write to
//void* user_data = events->user_data;
FILE* user_data = fopen ( "myfile1.wav" , "ab" );

FILE* fp = static_cast<FILE *>(user_data);

// Write all of the samples
fwrite(wav, sizeof(short), numsamples, fp);
return 0; // continue synthesis (1 is to abort)
}



int main()
{
printf("Hello World!\n");
const char* szVersionInfo = espeak_Info(NULL);

printf("Espeak version: %s\n", szVersionInfo);

iSampleRate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 4096, data_path, 0);
if (iSampleRate <= 0)
{
printf("Unable to initialize espeak");
return EXIT_FAILURE;
}

//samplerate = espeak_Initialize(AUDIO_OUTPUT_PLAYBACK,0,data_path,0);

//ListVoices();

strcpy(voicename, "default");
// espeak --voices
//strcpy(voicename, "german");
//strcpy(voicename, GetLanguageVoiceName("DE"));

if(espeak_SetVoiceByName(voicename) != EE_OK)
{
printf("Espeak setvoice error...\n");
}

static char word[200] = "Hello World" ;
strcpy(word, "TV-fäns aufgepasst, es ist 20 Uhr 15. Zeit für Rambo 3");
strcpy(word, "Unnamed Player wurde zum Opfer von GSG9");
int speed = 220;
int volume = 500; // volume in range 0-100 0=silence
int pitch = 50; // base pitch, range 0-100. 50=normal


// espeak.cpp 625
espeak_SetParameter(espeakRATE, speed, 0);
espeak_SetParameter(espeakVOLUME,volume,0);
espeak_SetParameter(espeakPITCH,pitch,0);
// espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal
// espeakPUNCTUATION: which punctuation characters to announce:
// value in espeak_PUNCT_TYPE (none, all, some),
//espeak_VOICE *voice_spec = espeak_GetCurrentVoice();
//voice_spec->gender=2; // 0=none 1=male, 2=female,
//voice_spec->age = age;

//espeak_SetVoiceByProperties(voice_spec);

//espeak_SetSynthCallback(AndroidEspeakDirectSpeechCallback);
espeak_SetSynthCallback(AndroidEspeakSynthToFileCallback);

unsigned int unique_identifier;
espeak_ERROR err = espeak_Synth( (char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, &unique_identifier, NULL);

err = espeak_Synchronize();



/*
strcpy(voicename, GetLanguageVoiceName("EN"));
espeak_SetVoiceByName(voicename);
strcpy(word, "Geany was fragged by GSG9 Googlebot");
strcpy(word, "Googlebot");

espeak_Synth( (char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL);
espeak_Synchronize();
*/

// espeak_Cancel();
espeak_Terminate();
printf("Espeak terminated\n");
system("pause");
return EXIT_SUCCESS;
}

最佳答案

您是否尝试过将您在回调中获得的缓冲区传递给sndplaysnd()??

Declare Function sndPlaySound Lib "winmm.dll" Alias "sndPlaySoundA" (ByVal lpszSoundName As String, ByVal uFlags As Long) As Long

其标准的winAPI如下:

  sndPlaySound(buffer[0], SND_ASYNC | SND_MEMORY)

或者,如果您有一个包含要播放的音频的 wav 文件:

  sndPlaySound(filename, SND_ASYNC)

playsound 具有异步模式,在播放音频时不会阻止您的程序执行。

注意:我在 VB 中使用过它,上面的代码片段是在 VB 中使用的。如果您使用 VC++ 进行编码,则可能必须相应地修改它们。但基本意图保持不变;将缓冲区传递给设置了 ASYNcflags的 sndPlaySound。

祝你好运!

关于c++ - Espeak SAPI/dll 在 Windows 上的使用?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/2661129/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com