ffmpeg - 当输入 pcm 样本计数不等于 1024 时，如何使用 ffmpeg-API 将重新采样的 PCM 音频编码为 AAC-6ren

ffmpeg - 当输入 pcm 样本计数不等于 1024 时，如何使用 ffmpeg-API 将重新采样的 PCM 音频编码为 AAC

转载作者：行者123 更新时间：2023-12-02 08:49:12

我正在致力于捕获音频并将其流式传输到 RTMP 服务器。我在 MacOS 下工作(在 Xcode 中)，因此为了捕获音频样本缓冲区，我使用 AVFoundation 框架。但对于编码和流媒体，我需要使用 ffmpeg-API 和 libfaac 编码器。因此输出格式必须是 AAC(以支持 iOS 设备上的流播放)。

我遇到了这样的问题:音频捕获设备(在我的例子中是罗技相机)为我提供了 512 个 LPCM 样本的样本缓冲区，我可以从 16000、24000、36000 或 48000 Hz 中选择输入采样率。当我将这 512 个样本提供给 AAC 编码器(配置为适当的采样率)时，我听到缓慢且抽搐的音频(似乎每帧后都是一片寂静)。

我发现(也许我错了)，libfaac 编码器仅接受 1024 个样本的音频帧。当我在编码之前将输入采样率设置为 24000 并将输入样本缓冲区重新采样为 48000 时，我获得了 1024 个重新采样的样本。将这 1024 个样本编码为 AAC 后，我在输出中听到了正确的声音。但是，当输出采样率必须为 48000 Hz 时，我的网络摄像头会在任何输入采样率的缓冲区中生成 512 个样本。所以无论如何我都需要进行重采样，并且重采样后我不会在缓冲区中获得恰好1024个样本。

有没有办法在 ffmpeg-API 功能中解决这个问题？

如果有任何帮助，我将不胜感激。

PS:我想我可以累积重新采样的缓冲区，直到样本数达到 1024，然后对其进行编码，但这是流，因此结果时间戳和其他输入设备会出现问题，并且这种解决方案不合适。

当前问题源于[问题]中描述的问题:How to fill audio AVFrame (ffmpeg) with the data obtained from CMSampleBufferRef (AVFoundation)?

这是带有音频编解码器配置的代码(还有视频流，但视频工作正常):

    /*global variables*/
    static AVFrame *aframe;
    static AVFrame *frame;
    AVOutputFormat *fmt; 
    AVFormatContext *oc; 
    AVStream *audio_st, *video_st;
Init ()
{
    AVCodec *audio_codec, *video_codec;
    int ret;

    avcodec_register_all();  
    av_register_all();
    avformat_network_init();
    avformat_alloc_output_context2(&oc, NULL, "flv", filename);
    fmt = oc->oformat;
    oc->oformat->video_codec = AV_CODEC_ID_H264;
    oc->oformat->audio_codec = AV_CODEC_ID_AAC;
    video_st = NULL;
    audio_st = NULL;
    if (fmt->video_codec != AV_CODEC_ID_NONE) 
      { //…  /*init video codec*/}
    if (fmt->audio_codec != AV_CODEC_ID_NONE) {
    audio_codec= avcodec_find_encoder(fmt->audio_codec);

    if (!(audio_codec)) {
        fprintf(stderr, "Could not find encoder for '%s'\n",
                avcodec_get_name(fmt->audio_codec));
        exit(1);
    }
    audio_st= avformat_new_stream(oc, audio_codec);
    if (!audio_st) {
        fprintf(stderr, "Could not allocate stream\n");
        exit(1);
    }
    audio_st->id = oc->nb_streams-1;

    //AAC:
    audio_st->codec->sample_fmt  = AV_SAMPLE_FMT_S16;
    audio_st->codec->bit_rate    = 32000;
    audio_st->codec->sample_rate = 48000;
    audio_st->codec->profile=FF_PROFILE_AAC_LOW;
    audio_st->time_base = (AVRational){1, audio_st->codec->sample_rate };
    audio_st->codec->channels    = 1;
    audio_st->codec->channel_layout = AV_CH_LAYOUT_MONO;      


    if (oc->oformat->flags & AVFMT_GLOBALHEADER)
        audio_st->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
    }

    if (video_st)
    {
    //   …
    /*prepare video*/
    }
    if (audio_st)
    {
    aframe = avcodec_alloc_frame();
    if (!aframe) {
        fprintf(stderr, "Could not allocate audio frame\n");
        exit(1);
    }
    AVCodecContext *c;
    int ret;

    c = audio_st->codec;


    ret = avcodec_open2(c, audio_codec, 0);
    if (ret < 0) {
        fprintf(stderr, "Could not open audio codec: %s\n", av_err2str(ret));
        exit(1);
    }

    //…
}

重新采样和编码音频:

if (mType == kCMMediaType_Audio)
{
    CMSampleTimingInfo timing_info;
    CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &timing_info);
    double  pts=0;
    double  dts=0;
    AVCodecContext *c;
    AVPacket pkt = { 0 }; // data and size must be 0;
    int got_packet, ret;
     av_init_packet(&pkt);
    c = audio_st->codec;
      CMItemCount numSamples = CMSampleBufferGetNumSamples(sampleBuffer);

    NSUInteger channelIndex = 0;

    CMBlockBufferRef audioBlockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
    size_t audioBlockBufferOffset = (channelIndex * numSamples * sizeof(SInt16));
    size_t lengthAtOffset = 0;
    size_t totalLength = 0;
    SInt16 *samples = NULL;
    CMBlockBufferGetDataPointer(audioBlockBuffer, audioBlockBufferOffset, &lengthAtOffset, &totalLength, (char **)(&samples));

    const AudioStreamBasicDescription *audioDescription = CMAudioFormatDescriptionGetStreamBasicDescription(CMSampleBufferGetFormatDescription(sampleBuffer));

    SwrContext *swr = swr_alloc();

    int in_smprt = (int)audioDescription->mSampleRate;
    av_opt_set_int(swr, "in_channel_layout",  AV_CH_LAYOUT_MONO, 0);

    av_opt_set_int(swr, "out_channel_layout", audio_st->codec->channel_layout,  0);

    av_opt_set_int(swr, "in_channel_count", audioDescription->mChannelsPerFrame,  0);
    av_opt_set_int(swr, "out_channel_count", audio_st->codec->channels,  0);

    av_opt_set_int(swr, "out_channel_layout", audio_st->codec->channel_layout,  0);
    av_opt_set_int(swr, "in_sample_rate",     audioDescription->mSampleRate,0);

    av_opt_set_int(swr, "out_sample_rate",    audio_st->codec->sample_rate,0);

    av_opt_set_sample_fmt(swr, "in_sample_fmt",  AV_SAMPLE_FMT_S16, 0);

    av_opt_set_sample_fmt(swr, "out_sample_fmt", audio_st->codec->sample_fmt,  0);

    swr_init(swr);
    uint8_t **input = NULL;
    int src_linesize;
    int in_samples = (int)numSamples;
    ret = av_samples_alloc_array_and_samples(&input, &src_linesize, audioDescription->mChannelsPerFrame,
                                             in_samples, AV_SAMPLE_FMT_S16P, 0);


    *input=(uint8_t*)samples;
    uint8_t *output=NULL;


    int out_samples = av_rescale_rnd(swr_get_delay(swr, in_smprt) +in_samples, (int)audio_st->codec->sample_rate, in_smprt, AV_ROUND_UP);

    av_samples_alloc(&output, NULL, audio_st->codec->channels, out_samples, audio_st->codec->sample_fmt, 0);
    in_samples = (int)numSamples;
    out_samples = swr_convert(swr, &output, out_samples, (const uint8_t **)input, in_samples);


    aframe->nb_samples =(int) out_samples;


    ret = avcodec_fill_audio_frame(aframe, audio_st->codec->channels, audio_st->codec->sample_fmt,
                             (uint8_t *)output,
                             (int) out_samples *
                             av_get_bytes_per_sample(audio_st->codec->sample_fmt) *
                             audio_st->codec->channels, 1);

    aframe->channel_layout = audio_st->codec->channel_layout;
    aframe->channels=audio_st->codec->channels;
    aframe->sample_rate= audio_st->codec->sample_rate;

    if (timing_info.presentationTimeStamp.timescale!=0)
        pts=(double) timing_info.presentationTimeStamp.value/timing_info.presentationTimeStamp.timescale;

    aframe->pts=pts*audio_st->time_base.den;
    aframe->pts = av_rescale_q(aframe->pts, audio_st->time_base, audio_st->codec->time_base);

    ret = avcodec_encode_audio2(c, &pkt, aframe, &got_packet);

    if (ret < 0) {
        fprintf(stderr, "Error encoding audio frame: %s\n", av_err2str(ret));
        exit(1);
    }
    swr_free(&swr);
    if (got_packet)
    {
        pkt.stream_index = audio_st->index;

        pkt.pts = av_rescale_q(pkt.pts, audio_st->codec->time_base, audio_st->time_base);
        pkt.dts = av_rescale_q(pkt.dts, audio_st->codec->time_base, audio_st->time_base);

        // Write the compressed frame to the media file.
       ret = av_interleaved_write_frame(oc, &pkt);
       if (ret != 0) {
            fprintf(stderr, "Error while writing audio frame: %s\n",
                    av_err2str(ret));
            exit(1);
        }

}

最佳答案

在遇到类似问题后我也来到这里。我正在从 Blackmagic Decklink SDI 卡读取 720p50 的音频和视频，这意味着每个视频帧 (48k/50fps) 有 960 个样本，我想与视频一起编码。当只向 aacenc 发送 960 个样本时，得到了非常奇怪的音频，而且它也没有真正提示这个事实。

开始使用 AVAudioFifo(参见 ffmpeg/doc/examples/transcode_aac.c)并不断向其添加帧，直到有足够的帧来满足 aacenc。我猜这意味着我的样本播放得太晚了，因为 pts 将设置为 1024 个样本，而前 960 个样本实际上应该有另一个值。但是，就我所听到/看到的而言，它并不是很明显。

关于ffmpeg - 当输入 pcm 样本计数不等于 1024 时，如何使用 ffmpeg-API 将重新采样的 PCM 音频编码为 AAC，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/16904841/

文章推荐： Pandas 条形图边缘到破折号

文章推荐： opencl - 多个设备之间的原子操作

文章推荐： android - 为什么我不能使用relativeLayout作为parentView？

文章推荐： django - models.py 出错后必须手动重新启动 runserver

javascript - 为什么 ~-1 等于 0 而 ~1 等于 -2？
根据小节 11.4.8 ECMAScript 5.1 标准: The production UnaryExpression : ~ UnaryExpression is evaluated as fo
php - MySQL REPLACE 行，其中字段 a 等于 x，字段 b 等于 y
我正在尝试构建一个“新评论”功能，向用户显示自上次访问以来的新评论数量。我构建了一个“ View ”表，其中包含主题 ID、用户 ID 和时间戳。每次用户访问该主题时更新时间戳或插入新行(如果不存在)
javascript - 为什么 !!1= ="1"等于 true 而 !!2= ="2"等于 false？
如标题所述，为什么: > !!1=="1" 等于 True 和 > !!2=="2" 等于: False 同样，为什么 > "1"==true 等于 true 而 > "2"==true 等于 fal
mysql - 混淆 "p OR q"， "p AND q"，其中 "p"等于 "false"， "q"等于 "unknown"
我在 Stack Overflow post 上看到了下图但是，我对“p OR q”、“p AND q”的结果感到困惑，其中“p”等于“false”，“q”等于“unknown”。在图中，“p O
mysql - 如果使用 MYSQL MARIADB，column1 中的 key1 等于 value1 并且 key2 等于 column2 中的 value2，如何在两列中进行搜索？
一栏有效 whereJsonContains('VehicleApplications' ,['ModelName' => $model, 'YearID' => $year] )->
mysql - 从故事中选择项目，其中 story.uid 等于 uid 或 story.uid 等于 friend.uid 或 friend.fid 并被接受
如果满足条件，我如何才能只获取特定记录？我有代码为 "SELECT a.id, a.text, a.uid, a.time FROM story a INNER JOIN friends b
MongoDB 等于
我正在尝试运行 MongoDB 查询并返回字段为空的记录(更具体地说，在 pyMongo 中为 None)。所以它必须等于 null。我知道这不等于: {"firstName": {"$ne": N
Java - HashCode - 等于
我在 Java 中进行单元测试时遇到问题。我把我的代码和错误放在这里。在互联网上我发现这是哈希码的问题。我需要重新创建它们，但我不知道为什么以及如何。我的方法: public void setGr
javascript - typescript 等于
如何在 Typescript 中实现 equals？我尝试了几种方法，都没有奏效。选项1: abstract class GTreeObject{ abstract equals(obj:
Java 对象的数组列表包含/等于
我查看了很多地方，大多数 arraylist 示例都使用“String”作为元素，但是很难找到使用对象的地方。假设我正在制作一个图书 Collection ，并且我有一个作者对象: class Au
Perl5 =(等于)运算符优先级
$a,$b,$c = 1,2,3; print "$a, $b, $c\n"; 返回 , , 1 那么 = (equals) 是否比元组构造具有更高的优先级 - 这样做？ $a,$b,($c=1
c - a = i++； `a` 等于 `i` 吗？
在此代码片段中，a 和 i 分别具有什么值以及为什么？ int i = 1; int a = i++; 是a == 1还是a == 2？最佳答案 a==1。然后，i==2 如果你这样做的话，那就是a
用于循环查找之前，等于，之间和之后值的Javascript？
我觉得我遗漏了一些明显的东西。这是一个简单的例子来说明我的问题。我希望 current = 3 返回“之前”。 current = 4 应该返回“key-two”，current = 5 应该返回“
Java 对象.等于
有人能告诉我为什么这会返回 true 吗？我想如果我投一些东西给例如Object 然后调用.equals,将使用 Object 的默认实现。 s1 == s2 应该返回 false。请告诉我在哪个主
swift - UIImage 等于
我需要检查加载到 UIImage 对象文件中的文件是否等于另一个图像，如果是，则执行一些操作。不幸的是，它不起作用。 emptyImage = UIImage(named: imageName) if
无法确定是否相等时，Java 等于
我想知道什么是正确的 Java 编程范式来覆盖类 C 对象的 equals(和 hashCode)方法，在以下情况下 (a) 有没有足够的信息来确定 C 的两个实例是否相等，或者 (b) 调用方法不应
python - 为什么 (()) 等于 ()？
>>> (()) == () True >>> (()) () 最佳答案 () 是一个 0 元组。 (foo) 产生 foo 的值。因此，(()) 产生一个 0 元组。来自 the tutorial
javascript - 为什么 i += i + i++ 等于 0？
考虑这段代码: var i = 0; >> undefined i += i + i++; >> 0 i >> 0 // why not 1? 由于增量 (++) 运算符，我希望 i 为 1。我认为
Delphi XE : Where is my TValue. 等于()？
在我看来，TValue 似乎缺少一个强制方法； TValue.Equals(TValue)。那么比较 2 个 TValue 的快速且合适的方法是什么，最好不使用 TValue.ToString()，
sql - 等于 (=) 与 LIKE
使用 SQL 时，在 WHERE 子句中使用 = 代替 LIKE 有什么好处吗？如果没有任何特殊的运算符，LIKE 和 = 是相同的，对吧？最佳答案不同的运算符 LIKE 和 = 是不同的运算符

行者123

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

ffmpeg - 当输入 pcm 样本计数不等于 1024 时，如何使用 ffmpeg-API 将重新采样的 PCM 音频编码为 AAC