gpt4 book ai didi

c++ - 使用A4音调扩展.WAV文件

转载 作者:行者123 更新时间:2023-12-03 01:41:27 38 4
gpt4 key购买 nike

我的目的是在增加A4音调和2000hz正弦波的同时,了解如何延长.WAV文件。举个例子,我应该能够拍摄一个10秒长的.WAV文件并将其扩展到30秒,同时在10秒内将A4音调添加到30秒标记,并在整个过程中添加2000Hz正弦波。我当前的代码如下:

#include <iostream>
#include <fstream>
#include <stdint.h>
#include <cstdlib>
#include <ctime>
#include <math.h>

using namespace std;
//Structures for Headers
struct RIFFHeader
{
char chunkId[4]; // 4 byte character array
uint32_t chunkSize; // 4 bytes
char format[4]; // 4 byte array
};

struct FormatSubChunk
{
char chunkId[4];
uint32_t chunkSize;
uint16_t audioFormat;
uint16_t channels;
uint32_t frequency; //
uint32_t byteRate; //
uint16_t blockAlign;
uint16_t bitsPerSample;
};

struct DataSubChunkHeader
{
char chunkId[4];
uint32_t chunkSize; //
};

struct Sample
{
uint16_t leftchannel;
uint16_t rightchannel;
};
//------------------------------------------------



int main()
{

clock_t start;
double duration;
start = clock();

//Declaring input and output files
string infile = "Frederick_N_orig.wav";
string outfile = "Frederick_N_mod.wav";
ifstream in(infile.c_str(), ios::in | ios::binary);
ofstream out(outfile.c_str());

//Reading Headers
RIFFHeader RIFF;
in.read((char*)&RIFF,sizeof(RIFF));


FormatSubChunk Format;
in.read((char*)&Format,sizeof(Format));



DataSubChunkHeader Data;
in.read((char*)&Data,sizeof(Data));



RIFF.chunkSize = (RIFF.chunkSize - 46)/2 + 46;
Format.frequency = Format.frequency/2;
Format.byteRate = Format.byteRate/2;
Data.chunkSize = (Data.chunkSize-46)/2;


uint16_t clear = 0;

out.write((char*)&RIFF, sizeof(RIFF));
out.write((char*)&Format, sizeof(Format));
out.write((char*)&Data, sizeof(Data));
//---------------------------------------------

//Printing out Header information for troubleshooting
cout << "Chunk ID: " << RIFF.chunkId[0] << RIFF.chunkId[1] << RIFF.chunkId[2] << RIFF.chunkId[3] << endl;
cout << "Chunk Size: " << RIFF.chunkSize << endl;
cout << "Format: " << RIFF.format[0] << RIFF.format[1] << RIFF.format[2] << RIFF.format[3] <<endl;
cout << "Sub-chunk1 ID: " << Format.chunkId[0] << Format.chunkId[1] << Format.chunkId[2] << Format.chunkId[3] <<endl;
cout << "Sub-chunk1 Size: " << Format.chunkSize << endl;
cout << "Audio Format: " << Format.audioFormat << endl;
cout << "Number of Channels: " << Format.channels << endl;
cout << "Sample Rate: " << Format.frequency << endl;
cout << "Byte Rate: " << Format.byteRate << endl;
cout << "Block Align: " << Format.blockAlign << endl;
cout << "Bits Per Sample: " << Format.bitsPerSample << endl;
cout << "Sub-chunk2 ID: " << Data.chunkId[0] << Data.chunkId[1] << Data.chunkId[2] << Data.chunkId[3] << endl;
cout << "Sub-chunk2 Size: " << Data.chunkSize << endl << endl;
//------------------------------------------------------------------------------------------------------------------------
double p = 440;
float pie = 3.1415;


int16_t leftSample1;
int16_t leftSample2;
int16_t rightSample1;
int16_t rightSample2;
int32_t count = 0;

int n = 0;

//Reading in Left and Right Channels and performing processing
while(!in.eof())
{

in.read((char*)&leftSample1, 2);
if(in.eof())
break;

in.read((char*)&rightSample1, 2);
if(in.eof())
break;

in.read((char*)&leftSample2, 2);
if(in.eof())
break;

in.read((char*)&rightSample2, 2);
if(in.eof())
break;

const double max_amplitude = 32760; // "volume"

double hz = 22050; // samples per second
double frequency = 440; // middle C
double seconds = 21; // time

int N = hz * seconds;
double amplitude = (double)n/N * max_amplitude;
double value = sin(2*pie*p)*.25;

int32_t leftAvg = ((int32_t)leftSample1 + (int32_t)leftSample2)/2;
int32_t rightAvg = ((int32_t)rightSample1 + (int32_t)rightSample2)/2;
leftAvg = leftAvg + leftAvg*value;
rightAvg = rightAvg + rightAvg*value;

n++;


int16_t outLeft;
int16_t outRight;

if(leftAvg > 32767)
outLeft = 32767;
else if(leftAvg < -32768)
outLeft = -32768;
else
outLeft = (int16_t)leftAvg;

if(rightAvg > 32767)
outRight = 32767;
else if(rightAvg < -32768)
outRight = -32768;
else
outRight = (int16_t)rightAvg;

out.write((char*)&outLeft, sizeof(outLeft));
out.write((char*)&outRight, sizeof(outRight));
count++;

}
//--------------------------------------------------------------------------------------
//cleaing up
in.close();
out.close();
//-------------

//Reading output file and comparing to original file
string infile1 = "Frederick_N_mod.wav";
ifstream in1(infile1.c_str(), ios::in | ios::binary);

RIFFHeader riff1;
in1.read((char*)&riff1,sizeof(riff1));


FormatSubChunk format1;
in1.read((char*)&format1,sizeof(format1));


//in.ignore(2);

DataSubChunkHeader data1;
in1.read((char*)&data1,sizeof(data1));
in.close();

cout << "Chunk ID: " << riff1.chunkId[0] << riff1.chunkId[1] << riff1.chunkId[2] << riff1.chunkId[3] << endl;
cout << "Chunk Size: " << riff1.chunkSize << endl;
cout << "Format: " << riff1.format[0] << riff1.format[1] << riff1.format[2] << riff1.format[3] <<endl;
cout << "Sub-chunk1 ID: " << format1.chunkId[0] << format1.chunkId[1] << format1.chunkId[2] << format1.chunkId[3] <<endl;
cout << "Sub-chunk1 Size: " << format1.chunkSize << endl;
cout << "Audio Format: " << format1.audioFormat << endl;
cout << "Number of Channels: " << format1.channels << endl;
cout << "Sample Rate: " << format1.frequency << endl;
cout << "Byte Rate: " << format1.byteRate << endl;
cout << "Block Align: " << format1.blockAlign << endl;
cout << "Bits Per Sample: " << format1.bitsPerSample << endl;
cout << "Sub-chunk2 ID: " << data1.chunkId[0] << data1.chunkId[1] << data1.chunkId[2] << data1.chunkId[3] << endl;
cout << "Sub-chunk2 Size: " << data1.chunkSize << endl << endl;
//---------------------------------------------------------------------------------------------------------------------------------

//Computing execution time and writing summary file
duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC;

string summaryoutfile = "summary.txt";
ofstream summaryoutput(summaryoutfile.c_str());
summaryoutput << "sampling frequency = " << Format.frequency << '\n';
long recordTime = count/Format.frequency;
summaryoutput << "record time = " << recordTime << " seconds" << '\n';
summaryoutput << "execution time = " << duration << " seconds" << '\n';
//----------------------------------------------------------------------------------------------

summaryoutput.close();

return 0;
}

因此,问题是,如何使用A4音调扩展.WAV文件并添加2000hz正弦波?当我尝试上面的代码时,出现乱码。任何帮助,将不胜感激!提前致谢!!

最佳答案

这个问题有很多部分,所以我建议将问题分解为几个部分,并验证每个部分是否有效。我还建议您使用Audacity之类的音频编辑工具,以便检查结果并验证每个步骤是否正常。我将其分为以下几个部分:

  • 代码是否正确输入了wav文件并以相同方式输出
    如果您不进行任何编辑?
  • 然后可以在不增加持续时间的情况下扩展wav文件吗?
    指定?
  • 可以按特定的频率和采样率生成正弦音吗?
  • 您可以将输入波形数据和任何正弦音调混合在一起吗
    产生的?

  • 此操作的非常粗糙的伪代码:

    使用输入波形文件数据块大小+扩展大小来计算输出波形文件数据卡盘大小

    保持值leftValue和rightValue变量,它们是要写出的计算出的输出值

    对于从输出wave文件长度开始到end wave文件长度的每个样本:
  • 将当前的leftValue和rightValues设置为0
  • 如果仍然有剩余的输入文件数据,请左右读取
    将样本值输入到leftValue和rightValue中
  • 如果要生成任何正弦音,请计算其值
    在给定位置
  • 将当前位置生成的正弦数据添加到leftValue和rightValue
  • 如果将多个数据源加在一起,则可能需要
    乘以某个增益量,这样音频就不会裁剪
  • 写入leftValue和rightValue以输出wav文件

  • 查看您当前的代码,我注意到一些问题:

    需要使用输入波形文件数据块大小+扩展音频的大小来计算输出波形文件数据卡盘大小的大小。此外,在输入的wav文件中,不要假设RIFF header 在数据块之前的长度是固定长度。

    我不确定为什么要一次读取两个输入样本并取平均值。这本身会使任何音频数据乱码。 (编辑:我看到您现在使用这种逻辑对音频进行下采样)

    您正在计算的 sin(2*pie*p)*.25都具有固定值,并且永远不会随时间变化。给定当前采样率,您需要在某个位置计算正弦值。然后,将正弦运算的值与样本数据相乘。您可能想混入正弦数据,这应该是一个加法运算。

    用诸如
    if(leftAvg > 32767)
    outLeft = 32767;
    else if(leftAvg < -32768)
    outLeft = -32768;
    else
    outLeft = (int16_t)leftAvg;

    如果水平超过限制,只会引入削波并产生不良结果。我还建议仅将所有样本计算为-1.0到1.0范围内的浮点值。在编辑音频与短值时,这是一种易于管理的格式。

    编辑:添加了一个示例方法,该方法可读取wav文件并用指定长度的正弦音填充。删除了下采样代码,因此逻辑更简单。
    void padWaveFile(string sourcePath, int paddingSeconds, float sineFrequency, string destinationPath)
    {
    ifstream in(sourcePath, ios::binary);
    ofstream out(destinationPath, ios::binary);

    RIFFHeader RIFF;
    in.read((char*)&RIFF, sizeof(RIFF));

    FormatSubChunk Format;
    in.read((char*)&Format, sizeof(Format));

    DataSubChunkHeader Data;
    in.read((char*)&Data, sizeof(Data));

    float twoPi = 2 * 3.14159265358979f;

    int sourceSampleCount = Data.chunkSize / (Format.channels * Format.bitsPerSample / 8);
    int sampleRate = Format.frequency;
    int paddingSampleCount = paddingSeconds * sampleRate;
    int destinationSampleCount = sourceSampleCount + paddingSampleCount;

    int sampleIndex = 0;
    float sinePosition = 0; //Maintain position of sine for each sample
    float sineStep = (twoPi * sineFrequency) / (float)sampleRate; //Sine sine step per sample at given sample rate and frequency
    float sineGain = 0.5; //Attenuate sine by half so that it isnt at full volume in output
    int16_t maxShort = 32767;
    int16_t sineValue = 0;

    out.write((char*)&RIFF, sizeof(RIFF));
    out.write((char*)&Format, sizeof(Format));

    uint32_t destinationChunkSize = destinationSampleCount * Format.channels * Format.bitsPerSample;
    Data.chunkSize = destinationChunkSize;
    out.write((char*)&Data, sizeof(Data));

    int16_t inLeft;
    int16_t inRight;
    int16_t outLeft;
    int16_t outRight;

    if (Format.channels == 2) {
    for (int i = 0; i < destinationSampleCount; i++)
    {
    outLeft = 0;
    outRight = 0;

    if (!in.eof())
    {
    in.read((char*)&inLeft, 2);
    in.read((char*)&inRight, 2);

    outLeft = inLeft;
    outRight = inRight;
    }
    else {
    sineValue = sin(sinePosition) * sineGain * maxShort;

    outLeft = sineValue;
    outRight = sineValue;

    sinePosition += sineStep;
    }

    out.write((char*)&outLeft, sizeof(outLeft));
    out.write((char*)&outRight, sizeof(outRight));

    sampleIndex++;
    }
    }
    }

    关于c++ - 使用A4音调扩展.WAV文件,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/47114340/

    38 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com