gpt4 book ai didi

video - 如何在使用 ffmpeg libavformat 将 VP8 RTP 流复用到 webm 时从 RTP 时间戳设置 AVPacket 的 pts 和 dts?

转载 作者:行者123 更新时间:2023-12-04 23:25:54 55 4
gpt4 key购买 nike

我正在使用 ffmpeg libavformat 库编写仅视频的 webm 文件。我在我的服务器上收到了 VP8 编码的 rtp 流。我已经成功地将 rtp 字节流(来自 rtp 有效负载)分组到单独的帧中,并构造了一个 AVPacket。我没有在这里将有效负载重新编码为 VP8,因为它已经是 vp8 编码的。

我正在使用 av_write_interleaved() 方法将 AVPacket 写入文件。虽然我得到一个 webm 文件作为输出,但它根本没有播放。当我使用 mkv 工具的“mkvinfo”命令检查文件信息时,我发现了以下信息:

+ EBML head
|+ EBML version: 1
|+ EBML read version: 1
|+ EBML maximum ID length: 4
|+ EBML maximum size length: 8
|+ Doc type: webm
|+ Doc type version: 2
|+ Doc type read version: 2
+ Segment, size 2142500
|+ Seek head (subentries will be skipped)
|+ EbmlVoid (size: 170)
|+ Segment information
| + Timestamp scale: 1000000
| + Multiplexing application: Lavf58.0.100
| + Writing application: Lavf58.0.100
| + Duration: 78918744.480s (21921:52:24.480)
|+ Segment tracks
| + A track
| + Track number: 1 (track ID for mkvmerge & mkvextract: 0)
| + Track UID: 1
| + Lacing flag: 0
| + Name: Video Track
| + Language: eng
| + Codec ID: V_VP8
| + Track type: video
| + Default duration: 1.000ms (1000.000 frames/fields per second for a
video track)
| + Video track
| + Pixel width: 640
| + Pixel height: 480
|+ Tags
| + Tag
| + Targets
| + Simple
| + Name: ENCODER
| + String: Lavf58.0.100
| + Tag
| + Targets
| + TrackUID: 1
| + Simple
| + Name: DURATION
| + String: 21921:52:24.4800000
|+ Cluster

正如我们所看到的,流的持续时间非常长。 (我的有效流持续时间应该在 8-10 秒左右)。而且,轨道信息中的帧速率也不是我设置的。我将帧速率设置为 25 fps。

我正在应用 av_scale_q(rtpTimeStamp, codec_timebase, stream_timebase) 并将重新缩放的 rtpTimeStamp 设置为 pts 和 dts 值。我的猜测是我设置 pts 和 dts 的方式是错误的。请帮助我如何在 AVPacket 上设置 pts 和 dts 值,以便获得一个带有正确元信息的工作 webm 文件。

编辑:

以下是我调用来初始化库的代码:
 #define STREAM_FRAME_RATE 25
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P

typedef struct OutputStream {
AVStream *st;
AVCodecContext *enc;
AVFrame *frame;
} OutputStream;


typedef struct WebMWriter {
OutputStream *audioStream, *videoStream;
AVFormatContext *ctx;
AVOutputFormat *outfmt;
AVCodec *audioCodec, *videoCodec;
} WebMWriter;

static OutputStream audioStream = { 0 }, videoStream = { 0 };

WebMWriter *init(char *filename)
{
av_register_all();

AVFormatContext *ctx = NULL;
AVCodec *audioCodec = NULL, *videoCodec = NULL;
const char *fmt_name = NULL;
const char *file_name = filename;

int alloc_status = avformat_alloc_output_context2(&ctx, NULL, fmt_name, file_name);

if(!ctx)
return NULL;

AVOutputFormat *fmt = (*ctx).oformat;

AVDictionary *video_opt = NULL;
av_dict_set(&video_opt, "language", "eng", 0);
av_dict_set(&video_opt, "title", "Video Track", 0);

if(fmt->video_codec != AV_CODEC_ID_NONE)
{
addStream(&videoStream, ctx, &videoCodec, AV_CODEC_ID_VP8, video_opt);
}

if(videoStream.st)
openVideo1(&videoStream, videoCodec, NULL);

av_dump_format(ctx, 0, file_name, 1);

int ret = -1;
/* open the output file, if needed */
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&ctx->pb, file_name, AVIO_FLAG_WRITE);
if (ret < 0) {
printf("Could not open '%s': %s\n", file_name, av_err2str(ret));
return NULL;
}
}

/* Write the stream header, if any. */
AVDictionary *format_opt = NULL;
ret = avformat_write_header(ctx, &format_opt);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file: %s\n",
av_err2str(ret));
return NULL;
}


WebMWriter *webmWriter = malloc(sizeof(struct WebMWriter));
webmWriter->ctx = ctx;
webmWriter->outfmt = fmt;
webmWriter->audioStream = &audioStream;
webmWriter->videoStream = &videoStream;
webmWriter->videoCodec = videoCodec;

return webmWriter;
}

以下是 openVideo() 方法:
 void openVideo1(OutputStream *out_st, AVCodec *codec, AVDictionary *opt_arg)
{
AVCodecContext *codec_ctx = out_st->enc;
int ret = -1;
AVDictionary *opt = NULL;
if(opt_arg != NULL)
{
av_dict_copy(&opt, opt_arg, 0);
ret = avcodec_open2(codec_ctx, codec, &opt);
}
else
{
ret = avcodec_open2(codec_ctx, codec, NULL);
}

/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(out_st->st->codecpar, codec_ctx);
if (ret < 0) {
printf("Could not copy the stream parameters\n");
exit(1);
}

}

以下是 addStream() 方法:
 void addStream(OutputStream *out_st, AVFormatContext *ctx, AVCodec **cdc, enum AVCodecID codecId, AVDictionary *opt_arg)
{

(*cdc) = avcodec_find_encoder(codecId);
if(!(*cdc)) {
exit(1);
}

/*as we are passing a NULL AVCodec cdc, So AVCodecContext codec_ctx will not be allocated, we have to do it explicitly */
AVStream *st = avformat_new_stream(ctx, *cdc);
if(!st) {
exit(1);
}

out_st->st = st;
st->id = ctx->nb_streams-1;

AVDictionary *opt = NULL;
av_dict_copy(&opt, opt_arg, 0);
st->metadata = opt;

AVCodecContext *codec_ctx = st->codec;
if (!codec_ctx) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
out_st->enc = codec_ctx;

codec_ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;

switch ((*cdc)->type) {
case AVMEDIA_TYPE_AUDIO:
codec_ctx->codec_id = codecId;
codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
codec_ctx->bit_rate = 64000;
codec_ctx->sample_rate = 48000;
codec_ctx->channels = 2;//1;
codec_ctx->channel_layout = AV_CH_LAYOUT_STEREO;
codec_ctx->codec_type = AVMEDIA_TYPE_AUDIO;
codec_ctx->time_base = (AVRational){1,STREAM_FRAME_RATE};


break;

case AVMEDIA_TYPE_VIDEO:
codec_ctx->codec_id = codecId;
codec_ctx->bit_rate = 90000;
codec_ctx->width = 640;
codec_ctx->height = 480;


codec_ctx->time_base = (AVRational){1,STREAM_FRAME_RATE};
codec_ctx->gop_size = 12;
codec_ctx->pix_fmt = STREAM_PIX_FMT;
codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;

break;

default:
break;
}

/* Some formats want stream headers to be separate. */
if (ctx->oformat->flags & AVFMT_GLOBALHEADER)
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}

以下是我调用以将一帧数据写入文件的代码:
 int writeVideoStream(AVFormatContext *ctx, AVStream *st, uint8_t *data, int size, long frameTimeStamp, int isKeyFrame, AVCodecContext *codec_ctx)
{
AVRational rat = st->time_base;
AVPacket pkt = {0};
av_init_packet(&pkt);

void *opaque = NULL;
int flags = AV_BUFFER_FLAG_READONLY;
AVBufferRef *bufferRef = av_buffer_create(data, size, NULL, opaque, flags);

pkt.buf = bufferRef;
pkt.data = data;
pkt.size = size;
pkt.stream_index = st->index;

pkt.pts = pkt.dts = frameTimeStamp;
pkt.pts = av_rescale_q(pkt.pts, codec_ctx->time_base, st->time_base);
pkt.dts = av_rescale_q(pkt.dts, codec_ctx->time_base, st->time_base);


if(isKeyFrame == 1)
pkt.flags |= AV_PKT_FLAG_KEY;

int ret = av_interleaved_write_frame(ctx, &pkt);
return ret;
}

注意:
这里的“frameTimeStamp”是该帧的 rtp 数据包上的 rtp 时间戳。

编辑 2.0:

我更新的 addStream() 方法与 codecpars 更改:
 void addStream(OutputStream *out_st, AVFormatContext *ctx, AVCodec **cdc, enum AVCodecID codecId, AVDictionary *opt_arg)
{

(*cdc) = avcodec_find_encoder(codecId);
if(!(*cdc)) {
printf("@@@@@ couldnt find codec \n");
exit(1);
}

AVStream *st = avformat_new_stream(ctx, *cdc);
if(!st) {
printf("@@@@@ couldnt init stream\n");
exit(1);
}

out_st->st = st;
st->id = ctx->nb_streams-1;
AVCodecParameters *codecpars = st->codecpar;
codecpars->codec_id = codecId;
codecpars->codec_type = (*cdc)->type;

AVDictionary *opt = NULL;
av_dict_copy(&opt, opt_arg, 0);
st->metadata = opt;
//av_dict_free(&opt);

AVCodecContext *codec_ctx = st->codec;
if (!codec_ctx) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
out_st->enc = codec_ctx;

//since opus is experimental codec
//codec_ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;

switch ((*cdc)->type) {
case AVMEDIA_TYPE_AUDIO:
codec_ctx->codec_id = codecId;
codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;//AV_SAMPLE_FMT_U8 or AV_SAMPLE_FMT_S16;
codec_ctx->bit_rate = 64000;
codec_ctx->sample_rate = 48000;
codec_ctx->channels = 2;//1;
codec_ctx->channel_layout = AV_CH_LAYOUT_STEREO; //AV_CH_LAYOUT_MONO;
codec_ctx->codec_type = AVMEDIA_TYPE_AUDIO;
codec_ctx->time_base = (AVRational){1,STREAM_FRAME_RATE};

codecpars->format = codec_ctx->sample_fmt;
codecpars->channels = codec_ctx->channels;
codecpars->sample_rate = codec_ctx->sample_rate;

break;

case AVMEDIA_TYPE_VIDEO:
codec_ctx->codec_id = codecId;
codec_ctx->bit_rate = 90000;
codec_ctx->width = 640;
codec_ctx->height = 480;

codec_ctx->time_base = (AVRational){1,STREAM_FRAME_RATE};
codec_ctx->gop_size = 12;
codec_ctx->pix_fmt = STREAM_PIX_FMT;
//codec_ctx->max_b_frames = 1;
codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
codec_ctx->framerate = av_inv_q(codec_ctx->time_base);
st->avg_frame_rate = codec_ctx->framerate;//(AVRational){25000, 1000};

codecpars->format = codec_ctx->pix_fmt;
codecpars->width = codec_ctx->width;
codecpars->height = codec_ctx->height;
codecpars->sample_aspect_ratio = (AVRational){codec_ctx->width, codec_ctx->height};

break;

default:
break;
}
codecpars->bit_rate = codec_ctx->bit_rate;

int ret = avcodec_parameters_to_context(codec_ctx, codecpars);
if (ret < 0) {
printf("Could not copy the stream parameters\n");
exit(1);
}

/* Some formats want stream headers to be separate. */
if (ctx->oformat->flags & AVFMT_GLOBALHEADER)
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}

最佳答案

我认为计算pts/dts的问题是正确的,使用此公式手动计算时间戳,看看它是否有效,然后您可以使用av_rescale_q .

这是我测试过的公式(用于原始(yuv)输出):

int64_t frameTime;
int64_t frameDuration;

frameDuration = video_st->time_base.den / video_fps; // i.e. 25
frameTime = frame_count * frameDuration;
pkt->pts = frameTime / video_st->time_base.num;
pkt->duration = frameDuration;

pkt->dts = pkt->pts;
pkt->stream_index = video_st->index;

av_interleaved_write_frame 之前使用它.
注: frame_count这是一个在每个视频帧输出后增加的计数器(使用 av_interleaved_write_frame)。

关于video - 如何在使用 ffmpeg libavformat 将 VP8 RTP 流复用到 webm 时从 RTP 时间戳设置 AVPacket 的 pts 和 dts?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/48440670/

55 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com