gpt4 book ai didi

c++ - 封装 H.264 的 C/C++ FLV 多路复用器未按预期工作

转载 作者:塔克拉玛干 更新时间:2023-11-03 07:02:36 25 4
gpt4 key购买 nike

我正在尝试制作 FLV 混合器。我开始测试从我的相机 (c920) 捕获的 h264 流。 .flv 文件编码后无法正常播放。

首先,我尝试在 h264 中查找 NAL,搜索模式 0x00 0x00 0x00 0x01...但是我在互联网上发现有两种模式可以找到 NAL...所以我实现了搜索 0x00 0x00 0x01 和其他...

在第一次测试中,我发现很少有 NAL 以四个字节开头,但是在更改代码以搜索 3 字节模式后,我发现了很多 NAL...

我在互联网上找到的引用资料显示代码使用很少的 NAL 类型来封装 FLV 文件,但是在更改为检测 3 个字节后,程序发现了很多 NAL,我不知道如何流式传输它们。 .

一些代码:-)

我正在用 C++ 实现,所以我有类。

FLV 作者

class FLVWritter{
public:
std::vector<uint8_t> buffer;

void flushToFD(int fd) {
if (buffer.size() <= 0)
return;
::write(fd,&buffer[0],buffer.size());
buffer.clear();
}

void reset(){
buffer.clear();
}

void writeFLVFileHeader(bool haveAudio, bool haveVideo){
uint8_t flv_header[13] = {
0x46, 0x4c, 0x56, 0x01, 0x05,
0x00, 0x00, 0x00, 0x09, 0x00,
0x00, 0x00, 0x00
};

if (haveAudio && haveVideo) {
flv_header[4] = 0x05;
} else if (haveAudio && !haveVideo) {
flv_header[4] = 0x04;
} else if (!haveAudio && haveVideo) {
flv_header[4] = 0x01;
} else {
flv_header[4] = 0x00;
}

for(int i=0;i<13;i++)
buffer.push_back(flv_header[i]);
}

void writeUInt8(uint8_t v) {
buffer.push_back(v);
}

void writeUInt16(uint32_t v){
buffer.push_back((uint8_t)(v >> 8));
buffer.push_back((uint8_t)(v));
}

void writeUInt24(uint32_t v){
buffer.push_back((uint8_t)(v >> 16));
buffer.push_back((uint8_t)(v >> 8));
buffer.push_back((uint8_t)(v));
}

void writeUInt32(uint32_t v){
buffer.push_back((uint8_t)(v >> 24));
buffer.push_back((uint8_t)(v >> 16));
buffer.push_back((uint8_t)(v >> 8));
buffer.push_back((uint8_t)(v));
}

void writeUInt32Timestamp(uint32_t v){
buffer.push_back((uint8_t)(v >> 16));
buffer.push_back((uint8_t)(v >> 8));
buffer.push_back((uint8_t)(v));
buffer.push_back((uint8_t)(v >> 24));
}

};

h264 解析器:

class ParserH264 {

std::vector<uint8_t> buffer;

enum State{
None,
Data,
NAL0,
NAL1
};
State nalState; // nal = network abstraction layer
State writingState;

void putByte(uint8_t byte) {
//
// Detect start code 00 00 01 and 00 00 00 01
//
// It returns the buffer right after the start code
//
if (byte == 0x00 && nalState == None)
nalState = NAL0;
else if (byte == 0x00 && (nalState == NAL0 || nalState == NAL1) )
nalState = NAL1;
else if (byte == 0x01 && nalState == NAL1){
nalState = None;

if (writingState == None){
writingState = Data;
return;
} else if (writingState == Data){

buffer.pop_back();// 0x00
buffer.pop_back();// 0x00

//in the case using the format 00 00 00 01, remove the last element detected
if (buffer[buffer.size()-1] == 0x00 &&
buffer[buffer.size()-2] != 0x03 )//keep value, if emulation prevention is present
buffer.pop_back();

chunkDetectedH264(&buffer[0],buffer.size());
buffer.clear();
return;
}
} else
nalState = None;

if (writingState == Data){
//
// increase raw buffer size
//
buffer.push_back(byte);
}
}

public:

ParserH264() {
nalState = None;
writingState = None;
}

virtual ~ParserH264(){
}

virtual void chunkDetectedH264(const uint8_t* ibuffer, int size){

}

void endOfStreamH264() {
if (buffer.size() <= 0)
return;

chunkDetectedH264(&buffer[0],buffer.size());
buffer.clear();
writingState = None;
nalState = None;
}

void parseH264(const uint8_t* ibuffer, int size) {
for(int i=0;i<size;i++){
putByte(ibuffer[i]);
}
}
};

最后是主类:

class FLVFileWriter: public ParserH264 {

std::vector<uint8_t> lastSPS;

public:
FLVWritter mFLVWritter;
int fd;
bool firstAudioWrite;
uint32_t audioTimestamp_ms;
uint32_t videoTimestamp_ms;

FLVFileWriter ( const char* file ) {
fd = open(file, O_WRONLY | O_CREAT | O_TRUNC , 0644 );
if (fd < 0){
fprintf(stderr,"error to create flv file\n");
exit(-1);
}

//have audio, have video
mFLVWritter.writeFLVFileHeader(false, true);
mFLVWritter.flushToFD(fd);

firstAudioWrite = true;
audioTimestamp_ms = 0;
videoTimestamp_ms = 0;
}

virtual ~FLVFileWriter(){
if (fd >= 0){
mFLVWritter.flushToFD(fd);
close(fd);
}
}

void chunkDetectedH264(const uint8_t* ibuffer, int size) {
printf("[debug] Detected NAL chunk size: %i\n",size);

if (size <= 0){
fprintf(stdout, " error On h264 chunk detection\n");
return;
}

uint8_t nal_bit = ibuffer[0];
uint8_t nal_type = (nal_bit & 0x1f);

//0x67
//if (nal_bit == (NAL_IDC_PICTURE | NAL_TYPE_SPS) ) {
if ( nal_type == (NAL_TYPE_SPS) ) {

fprintf(stdout, " processing: 0x%x (SPS)\n",nal_bit);

//store information to use when arrise PPS nal_bit, probably the next NAL detection
lastSPS.clear();
for(int i=0;i<size;i++)
lastSPS.push_back(ibuffer[i]);
}
//else if (nal_bit == (NAL_IDC_PICTURE | NAL_TYPE_PPS) ) {
else if ( nal_type == (NAL_TYPE_PPS) ) {

fprintf(stdout, " processing: 0x%x (PPS)\n",nal_bit);

//must be called just after the SPS detection
int32_t bodyLength = lastSPS.size() + size + 16;

//
// flv tag header = 11 bytes
//
mFLVWritter.writeUInt8(0x09);//tagtype video
mFLVWritter.writeUInt24( bodyLength );//data len
mFLVWritter.writeUInt32Timestamp( videoTimestamp_ms );//timestamp
mFLVWritter.writeUInt24( 0 );//stream id 0

//
// Message Body = 16 bytes + SPS bytes + PPS bytes
//
//flv VideoTagHeader
mFLVWritter.writeUInt8(0x17);//key frame, AVC 1:keyframe 7:h264
mFLVWritter.writeUInt8(0x00);//avc sequence header
mFLVWritter.writeUInt24( 0x00 );//composit time ??????????

//flv VideoTagBody --AVCDecoderCOnfigurationRecord
mFLVWritter.writeUInt8(0x01);//configurationversion
mFLVWritter.writeUInt8(lastSPS[1]);//avcprofileindication
mFLVWritter.writeUInt8(lastSPS[2]);//profilecompatibilty
mFLVWritter.writeUInt8(lastSPS[3]);//avclevelindication
mFLVWritter.writeUInt8(0xFC | 0x03); //reserved + lengthsizeminusone
mFLVWritter.writeUInt8(0xe0 | 0x01); // first reserved, second number of SPS

mFLVWritter.writeUInt16( lastSPS.size() ); //sequence parameter set length
//H264 sequence parameter set raw data
for(int i=0;i<lastSPS.size();i++)
mFLVWritter.writeUInt8(lastSPS[i]);

mFLVWritter.writeUInt8(0x01); // number of PPS

//sanity check with the packet size...
if ( size-4 > 0xffff ){
fprintf(stderr, "PPS Greater than 64k. This muxer does not support it.\n");
exit(-1);
}

mFLVWritter.writeUInt16(size); //picture parameter set length
//H264 picture parameter set raw data
for(int i=0;i<size;i++)
mFLVWritter.writeUInt8(ibuffer[i]);

//
// previous tag size
//
uint32_t currentSize = mFLVWritter.buffer.size();
if (currentSize != bodyLength + 11 ){
fprintf(stderr, "error to write flv video tag NAL_TYPE_PPS\n");
exit(-1);
}
mFLVWritter.writeUInt32(currentSize);//data len
mFLVWritter.flushToFD(fd);


videoTimestamp_ms += 1000/30;
}
//0x65
//else if (nal_bit == (NAL_IDC_PICTURE | NAL_TYPE_CSIDRP) ) {
else if ( nal_type == (NAL_TYPE_CSIDRP) ) {


fprintf(stdout, " processing: 0x%x (0x65)\n",nal_bit);

uint32_t bodyLength = size + 5 + 4;//flv VideoTagHeader + NALU length

//
// flv tag header = 11 bytes
//
mFLVWritter.writeUInt8(0x09);//tagtype video
mFLVWritter.writeUInt24( bodyLength );//data len
mFLVWritter.writeUInt32Timestamp( videoTimestamp_ms );//timestamp
mFLVWritter.writeUInt24( 0 );//stream id 0

//
// Message Body = VideoTagHeader(5) + NALLength(4) + NAL raw data
//
//flv VideoTagHeader
mFLVWritter.writeUInt8(0x17);//key frame, AVC 1:keyframe 2:inner frame 7:H264
mFLVWritter.writeUInt8(0x01);//avc NALU unit
mFLVWritter.writeUInt24(0x00);//composit time ??????????
mFLVWritter.writeUInt32(size);//nal length

//nal raw data
for(int i=0;i<size;i++)
mFLVWritter.writeUInt8(ibuffer[i]);

//
// previous tag size
//
uint32_t currentSize = mFLVWritter.buffer.size();
if (currentSize != bodyLength + 11 ){
fprintf(stderr, "error to write flv video tag NAL_TYPE_CSIDRP\n");
exit(-1);
}
mFLVWritter.writeUInt32(currentSize);//data len
mFLVWritter.flushToFD(fd);

videoTimestamp_ms += 1000/30;
}
//0x61
//else if (nal_bit == (NAL_IDC_FRAME | NAL_TYPE_CSNIDRP) ) {
else if ( nal_type == (NAL_TYPE_CSNIDRP) ) {

fprintf(stdout, " processing: 0x%x (0x61)\n",nal_bit);

uint32_t bodyLength = size + 5 + 4;//flv VideoTagHeader + NALU length

//
// flv tag header = 11 bytes
//
mFLVWritter.writeUInt8(0x09);//tagtype video
mFLVWritter.writeUInt24( bodyLength );//data len
mFLVWritter.writeUInt32Timestamp( videoTimestamp_ms );//timestamp
mFLVWritter.writeUInt24( 0 );//stream id 0

//
// Message Body = VideoTagHeader(5) + NALLength(4) + NAL raw data
//
//flv VideoTagHeader
mFLVWritter.writeUInt8(0x27);//key frame, AVC 1:keyframe 2:inner frame 7:H264
mFLVWritter.writeUInt8(0x01);//avc NALU unit
mFLVWritter.writeUInt24(0x00);//composit time ??????????
mFLVWritter.writeUInt32(size);//nal length

// raw nal data
for(int i=0;i<size;i++)
mFLVWritter.writeUInt8(ibuffer[i]);

//
// previous tag size
//
uint32_t currentSize = mFLVWritter.buffer.size();
if (currentSize != bodyLength + 11 ){
fprintf(stderr, "error to write flv video tag NAL_TYPE_CSNIDRP\n");
exit(-1);
}
mFLVWritter.writeUInt32(currentSize);//data len
mFLVWritter.flushToFD(fd);

videoTimestamp_ms += 1000/30;

}

else if (nal_type == (NAL_TYPE_SEI)) {
fprintf(stdout, " ignoring SEI bit: 0x%x type: 0x%x\n",nal_bit, nal_type);

} else {
// nal_bit type not implemented...
fprintf(stdout, "Error: unknown NAL bit: 0x%x type: 0x%x\n",nal_bit, nal_type);
exit(-1);
}
}

};

要在 main 中使用这些类,我们可以这样写:

volatile bool exit_requested = false;
void signal_handler(int signal) {
exit_requested = true;
}

int main(int argc, char* argv[]) {
int fd_stdin = fileno(stdin);

signal(SIGINT, signal_handler);
signal(SIGTERM, signal_handler);
signal(SIGQUIT, signal_handler);

unsigned char buffer[65536];

FLVFileWriter flv("out.flv");

while (!exit_requested) {

int readedSize = read(fd_stdin,buffer,65536);
if (readedSize==0)
break;

flv.parseH264(buffer,readedSize);
}

flv.endOfStreamH264();

signal(SIGINT, SIG_DFL);
signal(SIGTERM, SIG_DFL);
signal(SIGQUIT, SIG_DFL);

return 0;
}

编译上面的程序我们可以执行如下:

cat test.h264 | ./flv

然后我明白了:

video opened in vlc

我从命令行使用 ffmpeg 来检查我的源 h264 流文件是否已损坏。所以我运行了以下命令:

cat test.h264 | ffmpeg -i - -c:v copy out2.flv

使用 ffmpeg 结果是可以的:

video opened in vlc

我把我测试的视频放上完整源码here .

The h264 file I used for the test .

最佳答案

WIP(进行中)

但我发现了很多新信息:-)。

第一:FLV 容器无法从 h264 中一个一个地打包 NAL 缓冲区。

其次:只有当有新帧可用时,才需要增加时间戳。

我得到的绿屏问题与时间戳问题的 NAL 包碎片有关。

困难的部分是检测流何时获得新帧。有几种方法提到here .

但是 h264 流在其结构中具有不同位大小的数据成员。

阅读位

ue(v) 数据类型。要读取此结构,您需要计算前导 0 位直到到达位 1,并在我们找到的 1 位之后读取相同数量的 0 位。

h264 header 中此字段的内容可能会有所不同...一个常见的值称为 golomb,它是使用以下公式计算的:(1 << leadingZeros) - 1 + readbits(leadingZeros)。

RBSP - 原始字节序列负载

我们不能直接读取流值,因为它可能包含仿真阻止字节。我们需要在读取 header 之前删除仿真防止字节。

代码

因此,为了缓解这个问题,我首先重写了 H264 解析器,如下所示:

struct sps_info{
uint8_t profile_idc;
uint8_t constraints;
uint8_t level_idc;
uint8_t log2_max_frame_num;

bool set;
};

class ParserH264 {
std::vector<uint8_t> buffer;

enum State{
None,
Data,
NAL0,
NAL1,

EMULATION0,
EMULATION1,
EMULATION_FORCE_SKIP
};
State nalState; // nal = network abstraction layer
State writingState;

void putByte(uint8_t byte) {
//
// Detect start code 00 00 01 and 00 00 00 01
//
// It returns the buffer right after the start code
//
if (byte == 0x00 && nalState == None)
nalState = NAL0;
else if (byte == 0x00 && (nalState == NAL0 || nalState == NAL1) )
nalState = NAL1;
else if (byte == 0x01 && nalState == NAL1){
nalState = None;

if (writingState == None){
writingState = Data;
return;
} else if (writingState == Data){

buffer.pop_back();// 0x00
buffer.pop_back();// 0x00

//in the case using the format 00 00 00 01, remove the last element detected
if (buffer.size()-2 >=0 &&
buffer[buffer.size()-1] == 0x00 &&
buffer[buffer.size()-2] != 0x03 )//keep value, if emulation prevention is present
buffer.pop_back();

chunkDetectedH264(buffer);

buffer.clear();

return;
}
} else
nalState = None;

if (writingState == Data){
//
// increase raw buffer size
//
buffer.push_back(byte);
}
}

public:

ParserH264() {
nalState = None;
writingState = None;
}

virtual ~ParserH264(){
}

// rawBuffer might has emulation bytes
// Raw Byte Sequence Payload (RBSP)
virtual void chunkDetectedH264(const std::vector<uint8_t> &nal){

}

void endOfStreamH264() {
if (buffer.size() <= 0)
return;

chunkDetectedH264(buffer);

buffer.clear();
writingState = None;
nalState = None;
}

void parseH264(const uint8_t* ibuffer, int size) {
for(int i=0;i<size;i++){
putByte(ibuffer[i]);
}
}


static inline uint32_t readbit(int bitPos, const uint8_t* data, int size){
int dataPosition = bitPos / 8;
int bitPosition = 7 - bitPos % 8;
if (dataPosition >= size){
fprintf(stderr,"error to access bit...\n");
exit(-1);
}
return (data[dataPosition] >> bitPosition) & 0x01;
}

// leading 0`s count equals the number of next bits after bit 1
//
// Example: 01x 001xx 0001xxx 00001xxxx
//
// The max number of bits is equal 32 in this sintax
//
static inline int bitsToSkip_ue( int start, const uint8_t* data, int size){
int bitPos = start;
int dataPosition = start / 8;
int bitPosition;
while (dataPosition < size){
dataPosition = bitPos / 8;
bitPosition = 7 - bitPos % 8;
int bit = (data[dataPosition] >> bitPosition) & 0x01;
if (bit == 1)
break;
bitPos++;
}
int leadingZeros = bitPos - start;
int totalBits = leadingZeros + 1 + leadingZeros;
if (totalBits > 32){
fprintf(stderr,"bitsToSkip_ue length greated than 32\n");
exit(-1);
}
return totalBits;
}

static inline uint32_t read_golomb_ue(int start, const uint8_t* data, int size){
int bitPos = start;
int dataPosition = start / 8;
int bitPosition;
while (dataPosition < size){
dataPosition = bitPos / 8;
bitPosition = 7 - bitPos % 8;
int bit = (data[dataPosition] >> bitPosition) & 0x01;
if (bit == 1)
break;
bitPos++;
}
uint32_t leadingZeros = (uint32_t)(bitPos - start);
uint32_t num = readbits(bitPos+1, leadingZeros, data, size);
num += (1 << leadingZeros) - 1;

return num;
}

static inline uint32_t readbits(int bitPos, int length, const uint8_t* data, int size){
if (length > 32){
fprintf(stderr,"readbits length greated than 32\n");
exit(-1);
}
uint32_t result = 0;
for(int i=0;i<length;i++){
result <<= 1;
result = result | readbit(bitPos+i, data, size);
}
return result;
}

static inline sps_info parseSPS(const std::vector<uint8_t> sps_rbsp) {
const uint8_t *data = &sps_rbsp[0];
uint32_t size = sps_rbsp.size();

sps_info result;

result.profile_idc = sps_rbsp[1];
result.constraints = sps_rbsp[2];
result.level_idc = sps_rbsp[3];

uint32_t startIndex = 8+24;//NAL bit + profile_idc (8bits) + constraints (8bits) + level_idc (8bits)
startIndex += bitsToSkip_ue(startIndex, data, size);//seq_parameter_set_id (ue)

uint32_t log2_max_frame_num_minus4 = read_golomb_ue(startIndex, data, size);

if (log2_max_frame_num_minus4 < 0 ||
log2_max_frame_num_minus4 > 12){
fprintf(stderr,"parseSPS_log2_max_frame_num_minus4 value not in range [0-12] \n");
exit(-1);
}

result.log2_max_frame_num = log2_max_frame_num_minus4 + 4;
result.set = true;
return result;
}

// Raw Byte Sequence Payload (RBSP) -- without the emulation prevention bytes
// maxSize is used to parse just the beggining of the nal structure...
// avoid process all buffer size on NAL new frame check
static inline void nal2RBSP(const std::vector<uint8_t> &buffer,
std::vector<uint8_t> *rbsp,
int maxSize = -1){
if (maxSize <= 0)
maxSize = buffer.size();
rbsp->resize(maxSize);

State emulationState = None;
int count = 0;

for(int i=0; i < maxSize ;i++){
uint8_t byte = buffer[i];

if (byte == 0x00 && emulationState == None)
emulationState = EMULATION0;
else if (byte == 0x00 && (emulationState == EMULATION0 || emulationState == EMULATION1) )
emulationState = EMULATION1;
else if (byte == 0x03 && emulationState == EMULATION1)
{
emulationState = EMULATION_FORCE_SKIP;
continue;
}
else if (emulationState == EMULATION_FORCE_SKIP) { //skip 00 01 02 or 03
if ( byte != 0x00 && byte != 0x01 && byte != 0x02 && byte != 0x03 ){
fprintf(stdout, "H264 NAL emulation prevention pattern detected error (%u)\n", byte);
exit(-1);
}
emulationState = None;
} else
emulationState = None;

(*rbsp)[count] = byte;
count++;
}
if (count != rbsp->size())
rbsp->resize(count);
}

};

我更新了 FLVWriter 以写入整个帧(具有 1 个或多个 NAL)并写入视频序列 header 。

代码:

class FLVWritter{
public:
std::vector<uint8_t> buffer;

void flushToFD(int fd) {
if (buffer.size() <= 0)
return;
::write(fd,&buffer[0],buffer.size());
buffer.clear();
}

void reset(){
buffer.clear();
}

void writeFLVFileHeader(bool haveAudio, bool haveVideo){
uint8_t flv_header[13] = {
0x46, 0x4c, 0x56, 0x01, 0x05,
0x00, 0x00, 0x00, 0x09, 0x00,
0x00, 0x00, 0x00
};

if (haveAudio && haveVideo) {
flv_header[4] = 0x05;
} else if (haveAudio && !haveVideo) {
flv_header[4] = 0x04;
} else if (!haveAudio && haveVideo) {
flv_header[4] = 0x01;
} else {
flv_header[4] = 0x00;
}

for(int i=0;i<13;i++)
buffer.push_back(flv_header[i]);
}

void writeUInt8(uint8_t v) {
buffer.push_back(v);
}

void writeUInt16(uint32_t v){
buffer.push_back((v >> 8) & 0xff);
buffer.push_back((v) & 0xff);
}

void writeUInt24(uint32_t v){
buffer.push_back((v >> 16) & 0xff);
buffer.push_back((v >> 8) & 0xff);
buffer.push_back((v) & 0xff);
}

void writeUInt32(uint32_t v){
buffer.push_back((v >> 24) & 0xff);
buffer.push_back((v >> 16) & 0xff);
buffer.push_back((v >> 8) & 0xff);
buffer.push_back((v) & 0xff);
}

void writeUInt32(uint32_t v, std::vector<uint8_t> *data){
data->push_back((v >> 24) & 0xff);
data->push_back((v >> 16) & 0xff);
data->push_back((v >> 8) & 0xff);
data->push_back((v) & 0xff);
}

void writeUInt32Timestamp(uint32_t v){
buffer.push_back((v >> 16) & 0xff);
buffer.push_back((v >> 8) & 0xff);
buffer.push_back((v) & 0xff);
buffer.push_back((v >> 24) & 0xff);
}

void writeVideoSequenceHeader(const std::vector<uint8_t> &sps, const std::vector<uint8_t> &pps, const sps_info &spsinfo) {

//
// flv tag header = 11 bytes
//
writeUInt8(0x09);//tagtype video
writeUInt24( sps.size() + pps.size() + 16 );//data len
writeUInt32Timestamp( 0 );//timestamp
writeUInt24( 0 );//stream id 0

//
// Message Body = 16 bytes + SPS bytes + PPS bytes
//
//flv VideoTagHeader
writeUInt8(0x17);//key frame, AVC 1:keyframe 7:h264
writeUInt8(0x00);//avc sequence header
writeUInt24( 0x00 );//composition time

//flv VideoTagBody --AVCDecoderCOnfigurationRecord
writeUInt8(0x01);//configurationversion
writeUInt8(spsinfo.profile_idc);//avcprofileindication
writeUInt8(spsinfo.constraints);//profilecompatibilty
writeUInt8(spsinfo.level_idc);//avclevelindication
writeUInt8(0xFC | 0x03); //reserved (6 bits), NULA length size - 1 (2 bits)
writeUInt8(0xe0 | 0x01); // first reserved, second number of SPS

writeUInt16( sps.size() ); //sequence parameter set length
//H264 sequence parameter set raw data
for(int i=0;i<sps.size();i++)
writeUInt8(sps[i]);

writeUInt8(0x01); // number of PPS

writeUInt16(pps.size()); //picture parameter set length
//H264 picture parameter set raw data
for(int i=0;i<pps.size();i++)
writeUInt8(pps[i]);

if (buffer.size() != sps.size() + pps.size() + 16 + 11 ){
fprintf(stderr, "error writeVideoSequenceHeader\n");
exit(-1);
}

// previous tag size
writeUInt32(buffer.size());
}

void writeVideoFrame(const std::vector<uint8_t> &data, bool keyframe, uint32_t timestamp_ms, int streamID){

writeUInt8(0x09);//tagtype video
writeUInt24( data.size() + 5 );//data len
writeUInt32Timestamp( timestamp_ms );//timestamp
writeUInt24( streamID );//stream id 0)

if (keyframe)
writeUInt8(0x17);//key frame, AVC 1:keyframe 2:inner frame 7:H264
else
writeUInt8(0x27);//key frame, AVC 1:keyframe 2:inner frame 7:H264

writeUInt8(0x01);//avc NALU unit
writeUInt24(0x00);//composit time ??????????

for(int i=0;i<data.size();i++)
writeUInt8(data[i]);

if (buffer.size() != data.size() + 5 + 11 ){
fprintf(stderr, "error writeVideoFrame\n");
exit(-1);
}

// previous size
writeUInt32(buffer.size());

}

void writeVideoEndOfStream(uint32_t timestamp_ms, int streamID){
writeUInt8(0x09);//tagtype video
writeUInt24( 5 );//data len
writeUInt32Timestamp( timestamp_ms );//timestamp
writeUInt24( streamID );//stream id 0)

writeUInt8(0x17);//key frame, AVC 1:keyframe 2:inner frame 7:H264
writeUInt8(0x02);//avc EOS
writeUInt24(0x00);//composit time ??????????

if (buffer.size() != 5 + 11 ){
fprintf(stderr, "error writeVideoEOS\n");
exit(-1);
}

// previous size
writeUInt32(buffer.size());
}

};

接下来我们需要从流中检测新帧。

我已经实现了几个方法和 frame_num 方法。

要读取 frame_num,我们需要在读取 frame_num 位之前跳过 3 个 ue(v) 结构。

现在是 h264 新的帧检测器类:

class H264NewFrameDetection {
std::vector<uint8_t> aux;

bool newFrameOnNextIDR;
uint32_t old_frame_num;
bool spsinfo_set;

bool firstFrame;
public:
uint32_t currentFrame;
bool newFrameFound;

H264NewFrameDetection() {
currentFrame = 0;
newFrameOnNextIDR = false;
old_frame_num = 0;
spsinfo_set = false;
firstFrame = true;
}

void analyseBufferForNewFrame(const std::vector<uint8_t> &nal, const sps_info &spsinfo){

uint8_t nal_bit = nal[0];
uint8_t nal_type = (nal_bit & 0x1f);

if ( nal_type == (NAL_TYPE_SPS) ||
nal_type == (NAL_TYPE_PPS) ||
nal_type == (NAL_TYPE_AUD) ||
nal_type == (NAL_TYPE_SEI) ||
(nal_type >= 14 && nal_type <= 18)
) {
newFrameOnNextIDR = true;
}

if ((nal_type == NAL_TYPE_CSIDRP ||
nal_type == NAL_TYPE_CSNIDRP)
&& spsinfo.set ){

aux.clear();
//(8 + 3*(32*2+1) + 16) = max header per NALU slice bits = 27.375 bytes
// 32 bytes + 8 (Possibility of Emulation in 32 bytes)
int RBSPMaxBytes = 32 + 8;
if (nal.size() < (32 + 8))
RBSPMaxBytes = nal.size();
ParserH264::nal2RBSP(nal, &aux, RBSPMaxBytes );
uint8_t * rbspBuffer = &aux[0];
uint32_t rbspBufferSize = aux.size();

uint32_t frame_num_index = 8;//start counting after the nal_bit
//first_mb_in_slice (ue)
frame_num_index += ParserH264::bitsToSkip_ue(frame_num_index, rbspBuffer, rbspBufferSize);
//slice_type (ue)
frame_num_index += ParserH264::bitsToSkip_ue(frame_num_index, rbspBuffer, rbspBufferSize);
//pic_parameter_set_id (ue)
frame_num_index += ParserH264::bitsToSkip_ue(frame_num_index, rbspBuffer, rbspBufferSize);
//now can read frame_num
uint32_t frame_num = ParserH264::readbits(frame_num_index,
spsinfo.log2_max_frame_num,
rbspBuffer, rbspBufferSize);

if (!spsinfo_set){
old_frame_num = frame_num;
spsinfo_set = true;//spsinfo.set
}

if (old_frame_num != frame_num){
newFrameOnNextIDR = true;
old_frame_num = frame_num;
}
}


if (newFrameOnNextIDR &&(nal_type == NAL_TYPE_CSIDRP ||
nal_type == NAL_TYPE_CSNIDRP)) {
newFrameOnNextIDR = false;
if (firstFrame){//skip the first frame
firstFrame = false;
} else {
newFrameFound = true;
currentFrame++;
}
}
}

void reset(){
newFrameFound = false;
}

};

最后是作家类。

在这个实现中,我存储 NAL,直到检测到新帧。当它找到一个新帧时,我用 NAL 集(所有 NAL 都存储在列表中)写入 FLV 标签。

代码:

class FLVFileWriter: public ParserH264 {

std::vector<uint8_t> sps;
std::vector<uint8_t> pps;

sps_info spsInfo;

H264NewFrameDetection mH264NewFrameDetection;

public:
FLVWritter mFLVWritter;
int fd;
uint32_t videoTimestamp_ms;

//contains the several NALs until complete a frame... after that can write to FLV
std::vector<uint8_t> nalBuffer;

FLVFileWriter ( const char* file ) {
fd = open(file, O_WRONLY | O_CREAT | O_TRUNC , 0644 );
if (fd < 0){
fprintf(stderr,"error to create flv file\n");
exit(-1);
}

//have audio, have video
mFLVWritter.writeFLVFileHeader(false, true);
mFLVWritter.flushToFD(fd);

videoTimestamp_ms = 0;
spsInfo.set = false;
}

virtual ~FLVFileWriter(){
if (fd >= 0){

//force write the last frame
if (nalBuffer.size() > 0){
uint8_t firstNALtype = nalBuffer[4] & 0x1f;

bool iskeyframe = (firstNALtype == NAL_TYPE_CSIDRP);
mFLVWritter.writeVideoFrame(nalBuffer, iskeyframe, videoTimestamp_ms, 0);
mFLVWritter.flushToFD(fd);

nalBuffer.clear();

mFLVWritter.writeVideoEndOfStream(videoTimestamp_ms,0);
mFLVWritter.flushToFD(fd);
} else {
if (mH264NewFrameDetection.currentFrame > 0)
videoTimestamp_ms = ((mH264NewFrameDetection.currentFrame-1) * 1000)/30;
mFLVWritter.writeVideoEndOfStream(videoTimestamp_ms,0);
mFLVWritter.flushToFD(fd);
}
close(fd);
}
}


//decoding time stamp (DTS) and presentation time stamp (PTS)
void chunkDetectedH264(const std::vector<uint8_t> &data) {

if (data.size() <= 0){
fprintf(stdout, " error On h264 chunk detection\n");
return;
}

uint8_t nal_bit = data[0];
uint8_t nal_type = (nal_bit & 0x1f);

mH264NewFrameDetection.analyseBufferForNewFrame(data, spsInfo);

if (mH264NewFrameDetection.newFrameFound){
mH264NewFrameDetection.reset();

uint8_t firstNALtype = nalBuffer[4] & 0x1f;

bool iskeyframe = (firstNALtype == NAL_TYPE_CSIDRP);
mFLVWritter.writeVideoFrame(nalBuffer, iskeyframe, videoTimestamp_ms, 0);
mFLVWritter.flushToFD(fd);

nalBuffer.clear();

videoTimestamp_ms = (mH264NewFrameDetection.currentFrame * 1000)/30;
}

//0x67
if ( nal_type == (NAL_TYPE_SPS) ) {
fprintf(stdout, " processing: 0x%x (SPS)\n",nal_bit);

sps.clear();
//max 26 bytes on sps header (read until log2_max_frame_num_minus4)
nal2RBSP(data, &sps, 26 );
spsInfo = parseSPS(sps);

sps.clear();
for(int i=0;i<data.size();i++)
sps.push_back(data[i]);

}
//0x68
else if ( nal_type == (NAL_TYPE_PPS) ) {
fprintf(stdout, " processing: 0x%x (PPS)\n",nal_bit);

pps.clear();
for(int i=0;i<data.size();i++)
pps.push_back(data[i]);

mFLVWritter.writeVideoSequenceHeader(sps, pps, spsInfo);
mFLVWritter.flushToFD(fd);

}
//0x65, 0x61, 0x41
else if ( nal_type == NAL_TYPE_CSIDRP ||
nal_type == NAL_TYPE_CSNIDRP ) {
//convert annexb to AVCC (length before the NAL structure)
mFLVWritter.writeUInt32( data.size(), &nalBuffer );
for(int i=0;i<data.size();i++)
nalBuffer.push_back(data[i]);

} else if (nal_type == (NAL_TYPE_SEI)) {
fprintf(stdout, " ignoring SEI bit: 0x%x type: 0x%x\n",nal_bit, nal_type);
} else {
// nal_bit type not implemented...
fprintf(stdout, "Error: unknown NAL bit: 0x%x type: 0x%x\n",nal_bit, nal_type);
exit(-1);
}

}
};

结论

经过测试,该视频可以在这个新实现中正常运行。

我现在唯一的问题是与 PTS(演示时间戳)和 DTS(解码器时间戳)有关。我不知道如何从流中提取它们以及如何使用它们来计算帧时间戳。

当前的实现使用硬编码比率 1000/30(例如 30fps)作为增加时间戳的基础。

时间戳以毫秒为单位。

关于c++ - 封装 H.264 的 C/C++ FLV 多路复用器未按预期工作,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/56894728/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com