gpt4 book ai didi

c - FFMPEG:将视频的一部分写入新文件时 AV 不同步

转载 作者:行者123 更新时间:2023-11-30 16:48:10 25 4
gpt4 key购买 nike

我正在使用 FFMPEG 和人脸检测 API 为计算机视觉项目开发数据预处理程序。在这个程序中,我需要从给定的输入视频文件中提取包含人脸的镜头并将其输出到一个新文件中。但是当我播放该程序生成的输出视频文件时,视频和音频轨道不同步。我认为可能的原因是视频帧或音频帧的时间戳设置不正确,但我无法自己修复它,因为我对 FFMPEG 库不太熟悉,请帮我解决这个不同步的问题.

为了简化下面所示的代码,我删除了所有面部检测代码,并使用名为 faceDetect 的空函数来表示它。

// ffmpegAPI.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <iostream>

extern "C" {
#include <libavutil/opt.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
#include <libavutil/channel_layout.h>
#include <libavutil/common.h>
#include <libavutil/imgutils.h>
#include <libavutil/mathematics.h>
#include <libavutil/samplefmt.h>
#include <libavutil/pixdesc.h>
#include <libswscale/swscale.h>

}
bool faceDetect(AVFrame *frame)
{
/*...*/
return true;
}
int main(int argc, char **argv)
{
int64_t videoPts = 0, audioPts = 0;
int samples_count = 0;
AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx = NULL;
AVOutputFormat *ofmt = NULL;
AVPacket pkt;
AVFrame *frame = NULL;
int videoindex = -1; int audioindex = -1;
double videoTime = DBL_MAX;
const char *in_filename, *out_filename;
int ret, i;
in_filename = "C:\\input.flv";//Input file name
out_filename = "C:\\output.avi";//Output file name
av_register_all();
//Open input file
if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0) {
fprintf(stderr, "Could not open input file '%s'", in_filename);
goto end;
}
//Find input streams
if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0) {
fprintf(stderr, "Failed to retrieve input stream information");
goto end;
}
//Retrive AV stream information
for (i = 0; i < ifmt_ctx->nb_streams; i++)
{
AVStream *stream;
AVCodecContext *codec_ctx;
stream = ifmt_ctx->streams[i];//Get current stream
codec_ctx = stream->codec;//Get current stream codec
if (codec_ctx->codec_type == AVMEDIA_TYPE_VIDEO)
{
videoindex = i;//video stream index
}
else if (codec_ctx->codec_type == AVMEDIA_TYPE_AUDIO)
{
audioindex = i;//audio stream index
}
if (videoindex == -1)//no video stream is found
{
printf("can't find video stream\n");
goto end;

}
}
av_dump_format(ifmt_ctx, 0, in_filename, 0);
//Configure output
avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, out_filename);
if (!ofmt_ctx) {
fprintf(stderr, "Could not create output context\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ofmt = ofmt_ctx->oformat;
//Configure output streams
for (i = 0; i < ifmt_ctx->nb_streams; i++) {//Traversal input streams
AVStream *in_stream = ifmt_ctx->streams[i];//Get current stream
AVStream *out_stream = avformat_new_stream(ofmt_ctx, in_stream->codec->codec);//Create a corresponding output stream
if (!out_stream) {
fprintf(stderr, "Failed allocating output stream\n");
ret = AVERROR_UNKNOWN;
goto end;
}
//Copy codec from current input stream to corresponding output stream
ret = avcodec_copy_context(out_stream->codec, in_stream->codec);
if (ret < 0) {
fprintf(stderr, "Failed to copy context from input to output stream codec context\n");
goto end;
}
if (i == videoindex)//Video stream
{
if (out_stream->codec->codec_id == AV_CODEC_ID_H264)
{
out_stream->codec->me_range = 16;
out_stream->codec->max_qdiff = 4;
out_stream->codec->qmin = 10;
out_stream->codec->qmax = 51;
out_stream->codec->qcompress = 1;

}
}
AVCodecContext *codec_ctx = out_stream->codec;
if (codec_ctx->codec_type == AVMEDIA_TYPE_VIDEO
|| codec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) {
//Find codec encoder
AVCodec *encoder = avcodec_find_encoder(codec_ctx->codec_id);
if (!encoder) {
av_log(NULL, AV_LOG_FATAL, "Necessary encoder not found\n");
ret = AVERROR_INVALIDDATA;
goto end;
}
//Open encoder
ret = avcodec_open2(codec_ctx, encoder, NULL);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot open video encoder for stream #%u\n", i);
goto end;
}
out_stream->codec->codec_tag = 0;
if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
out_stream->codec->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
//Open the decoder for input stream
codec_ctx = in_stream->codec;
if (codec_ctx->codec_type == AVMEDIA_TYPE_VIDEO
|| codec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) {
ret = avcodec_open2(codec_ctx,
avcodec_find_decoder(codec_ctx->codec_id), NULL);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Failed to open decoder for stream #%u\n", i);
}
}
}
av_dump_format(ofmt_ctx, 0, out_filename, 1);
//Open output file for writing
if (!(ofmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&ofmt_ctx->pb, out_filename, AVIO_FLAG_WRITE);
if (ret < 0) {
fprintf(stderr, "Could not open output file '%s'", out_filename);
goto end;
}
}

//Write video header
ret = avformat_write_header(ofmt_ctx, NULL);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file\n");
goto end;
}
//Write frames in a loop
while (1) {
AVStream *in_stream, *out_stream;
//Read one frame from the input file
ret = av_read_frame(ifmt_ctx, &pkt);
if (ret < 0)
break;
in_stream = ifmt_ctx->streams[pkt.stream_index];//Get current input stream
out_stream = ofmt_ctx->streams[pkt.stream_index];//Get current output stream
if (pkt.stream_index == videoindex)//video frame
{
int got_frame;
frame = av_frame_alloc();
if (!frame) {
ret = AVERROR(ENOMEM);
break;
}
//Readjust packet timestamp for decoding
av_packet_rescale_ts(&pkt,
in_stream->time_base,
in_stream->codec->time_base);
//Decode video frame
int len = avcodec_decode_video2(in_stream->codec, frame, &got_frame, &pkt);
if (len < 0)
{
av_frame_free(&frame);
av_log(NULL, AV_LOG_ERROR, "Decoding failed\n");
break;
}
if (got_frame)//Got a decoded video frame
{
int64_t pts = av_frame_get_best_effort_timestamp(frame);
//determine if the frame image contains human face
bool result = faceDetect(frame);
if (result) //face contained
{
videoTime = pts* av_q2d(out_stream->time_base);
frame->pts = videoPts++;//Set pts of video frame
AVPacket enc_pkt;
av_log(NULL, AV_LOG_INFO, "Encoding video frame\n");
//Create packet for encoding
enc_pkt.data = NULL;
enc_pkt.size = 0;
av_init_packet(&enc_pkt);
//Encoding frame
ret = avcodec_encode_video2(out_stream->codec, &enc_pkt,
frame, &got_frame);
av_frame_free(&frame);
if (!(got_frame))
ret = 0;
/* Configure encoding properties */
enc_pkt.stream_index = videoindex;
av_packet_rescale_ts(&enc_pkt,
out_stream->codec->time_base,
out_stream->time_base);
av_log(NULL, AV_LOG_DEBUG, "Muxing frame\n");
/* Write encoded frame */
ret = av_interleaved_write_frame(ofmt_ctx, &enc_pkt);
if (ret < 0)
break;
}
else //no face contained
{
//Set the videoTime as maximum double value,
//making the corresponding audio frame not been processed
if (videoTime < DBL_MAX)
videoTime = DBL_MAX;
}

}
else
{
av_frame_free(&frame);
}
}
else//Audio frame
{
//Get current frame time
double audioTime = pkt.pts * av_q2d(in_stream->time_base);
if (audioTime >= videoTime)
{//The current frame should be written into output file
int got_frame;
frame = av_frame_alloc();
if (!frame) {
ret = AVERROR(ENOMEM);
break;
}
//Readjust packet timestamp for decoding
av_packet_rescale_ts(&pkt,
in_stream->time_base,
in_stream->codec->time_base);
//Decode audio frame
int len = avcodec_decode_audio4(in_stream->codec, frame, &got_frame, &pkt);
if (len < 0)
{
av_frame_free(&frame);
av_log(NULL, AV_LOG_ERROR, "Decoding failed\n");
break;
}
if (got_frame)//Got a decoded audio frame
{
//Set pts of audio frame
frame->pts = audioPts;
audioPts += frame->nb_samples;
AVPacket enc_pkt;
av_log(NULL, AV_LOG_INFO, "Encoding audio frame");
//Create packet for encoding
enc_pkt.data = NULL;
enc_pkt.size = 0;
av_init_packet(&enc_pkt);
//Encode audio frame
ret = avcodec_encode_audio2(out_stream->codec, &enc_pkt,
frame, &got_frame);
av_frame_free(&frame);
if (!(got_frame))
ret = 0;
/* Configure encoding properties */
enc_pkt.stream_index = audioindex;
av_packet_rescale_ts(&enc_pkt,
out_stream->codec->time_base,
out_stream->time_base);
av_log(NULL, AV_LOG_DEBUG, "Muxing frame\n");
/* Write encoded frame */
ret = av_interleaved_write_frame(ofmt_ctx, &enc_pkt);
if (ret < 0)
break;
}
else //Shouldn't be written
{
av_frame_free(&frame);
}
}
}
av_packet_unref(&pkt);
}
//Write video trailer
av_write_trailer(ofmt_ctx);
end://Clean up
av_log(NULL, AV_LOG_INFO, "Clean up\n");
av_frame_free(&frame);
for (i = 0; i < ifmt_ctx->nb_streams; i++) {
avcodec_close(ifmt_ctx->streams[i]->codec);
if (ofmt_ctx && ofmt_ctx->nb_streams > i && ofmt_ctx->streams[i] && ofmt_ctx->streams[i]->codec)
avcodec_close(ofmt_ctx->streams[i]->codec);
}
avformat_close_input(&ifmt_ctx);
/* Close output file */
if (ofmt_ctx && !(ofmt_ctx->oformat->flags & AVFMT_NOFILE))
avio_closep(&ofmt_ctx->pb);
avformat_free_context(ofmt_ctx);
if (ret < 0 && ret != AVERROR_EOF) {
char buf[256];
av_strerror(ret, buf, sizeof(buf));
av_log(NULL, AV_LOG_ERROR, "Error occurred:%s\n", buf);
system("Pause");
return 1;
}
//Program end
printf("The End.\n");
system("Pause");
return 0;
}

最佳答案

在原始代码中,我是这样设置pts的。

 frame->pts = videoPts++;//Set pts of video frame

这意味着我假设每个视频帧之间的pts增量为1。但实际上并非如此,当我在 Debug模式下检查输入文件的视频流时,发现它的pts增量为2。经过一番观察,我发现解码帧的pkt_duration属性可能代表输入视频流中的pts增量,因为它与 pts 增量具有相同的值。所以我将上面的代码改为

frame->pts = videoPts += frame->pkt_duration;

当我测试修改后的代码时,视频流和音频流不再不同步。到目前为止,我无法确保 pkt_duration 和 pts 增量实际上是同一件事,但在我的情况下似乎是这样。无论如何,使用 pkt_duration 解决了我的不同步问题。

关于c - FFMPEG:将视频的一部分写入新文件时 AV 不同步,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/43064357/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com