c++ - 封装 H.264 的 C/C++ FLV 多路复用器未按预期工作-6ren

c++ - 封装 H.264 的 C/C++ FLV 多路复用器未按预期工作

转载作者：塔克拉玛干更新时间：2023-11-03 07:02:36

我正在尝试制作 FLV 混合器。我开始测试从我的相机 (c920) 捕获的 h264 流。 .flv 文件编码后无法正常播放。

首先，我尝试在 h264 中查找 NAL，搜索模式 0x00 0x00 0x00 0x01...但是我在互联网上发现有两种模式可以找到 NAL...所以我实现了搜索 0x00 0x00 0x01 和其他...

在第一次测试中，我发现很少有 NAL 以四个字节开头，但是在更改代码以搜索 3 字节模式后，我发现了很多 NAL...

我在互联网上找到的引用资料显示代码使用很少的 NAL 类型来封装 FLV 文件，但是在更改为检测 3 个字节后，程序发现了很多 NAL，我不知道如何流式传输它们。 .

一些代码:-)

我正在用 C++ 实现，所以我有类。

FLV 作者

class FLVWritter{
public:
    std::vector<uint8_t> buffer;

    void flushToFD(int fd) {
        if (buffer.size() <= 0)
            return;
        ::write(fd,&buffer[0],buffer.size());
        buffer.clear();
    }

    void reset(){
        buffer.clear();
    }

    void writeFLVFileHeader(bool haveAudio, bool haveVideo){
        uint8_t flv_header[13] = {
            0x46, 0x4c, 0x56, 0x01, 0x05,
            0x00, 0x00, 0x00, 0x09, 0x00,
            0x00, 0x00, 0x00
        };

        if (haveAudio && haveVideo) {
            flv_header[4] = 0x05;
        } else if (haveAudio && !haveVideo) {
            flv_header[4] = 0x04;
        } else if (!haveAudio && haveVideo) {
            flv_header[4] = 0x01;
        } else {
            flv_header[4] = 0x00;
        }

        for(int i=0;i<13;i++)
            buffer.push_back(flv_header[i]);
    }

    void writeUInt8(uint8_t v) {
        buffer.push_back(v);
    }

    void writeUInt16(uint32_t v){
        buffer.push_back((uint8_t)(v >> 8));
        buffer.push_back((uint8_t)(v));
    }

    void writeUInt24(uint32_t v){
        buffer.push_back((uint8_t)(v >> 16));
        buffer.push_back((uint8_t)(v >> 8));
        buffer.push_back((uint8_t)(v));
    }

    void writeUInt32(uint32_t v){
        buffer.push_back((uint8_t)(v >> 24));
        buffer.push_back((uint8_t)(v >> 16));
        buffer.push_back((uint8_t)(v >> 8));
        buffer.push_back((uint8_t)(v));
    }

    void writeUInt32Timestamp(uint32_t v){
        buffer.push_back((uint8_t)(v >> 16));
        buffer.push_back((uint8_t)(v >> 8));
        buffer.push_back((uint8_t)(v));
        buffer.push_back((uint8_t)(v >> 24));
    }

};

h264 解析器:

class ParserH264 {

    std::vector<uint8_t> buffer;

    enum State{
        None,
        Data,
        NAL0,
        NAL1
    };
    State nalState; // nal = network abstraction layer
    State writingState;

    void putByte(uint8_t byte) {
        //
        // Detect start code 00 00 01 and 00 00 00 01
        //
        // It returns the buffer right after the start code
        //
        if (byte == 0x00 && nalState == None)
            nalState = NAL0;
        else if (byte == 0x00 && (nalState == NAL0 || nalState == NAL1) )
            nalState = NAL1;
        else if (byte == 0x01 && nalState == NAL1){
            nalState = None;
            
            if (writingState == None){
                writingState = Data;
                return;
            } else if (writingState == Data){
                
                buffer.pop_back();// 0x00
                buffer.pop_back();// 0x00
                
                //in the case using the format 00 00 00 01, remove the last element detected
                if (buffer[buffer.size()-1] == 0x00 &&
                    buffer[buffer.size()-2] != 0x03 )//keep value, if emulation prevention is present
                    buffer.pop_back();
                
                chunkDetectedH264(&buffer[0],buffer.size());
                buffer.clear();
                return;
            }
        } else
            nalState = None;

        if (writingState == Data){
            //
            // increase raw buffer size
            //
            buffer.push_back(byte);
        }
    }

public:

    ParserH264() {
        nalState = None;
        writingState = None;
    }

    virtual ~ParserH264(){
    }

    virtual void chunkDetectedH264(const uint8_t* ibuffer, int size){
        
    }

    void endOfStreamH264() {
        if (buffer.size() <= 0)
            return;

        chunkDetectedH264(&buffer[0],buffer.size());
        buffer.clear();
        writingState = None;
        nalState = None;
    }

    void parseH264(const uint8_t* ibuffer, int size) {
        for(int i=0;i<size;i++){
            putByte(ibuffer[i]);
        }
    }
};

最后是主类:

class FLVFileWriter: public ParserH264 {

    std::vector<uint8_t> lastSPS;

public:
    FLVWritter mFLVWritter;
    int fd;
    bool firstAudioWrite;
    uint32_t audioTimestamp_ms;
    uint32_t videoTimestamp_ms;

    FLVFileWriter ( const char* file ) {
        fd = open(file, O_WRONLY | O_CREAT | O_TRUNC , 0644 );
        if (fd < 0){
            fprintf(stderr,"error to create flv file\n");
            exit(-1);
        }

        //have audio, have video
        mFLVWritter.writeFLVFileHeader(false, true);
        mFLVWritter.flushToFD(fd);

        firstAudioWrite = true;
        audioTimestamp_ms = 0;
        videoTimestamp_ms = 0;
    }

    virtual ~FLVFileWriter(){
        if (fd >= 0){
            mFLVWritter.flushToFD(fd);
            close(fd);
        }
    }

    void chunkDetectedH264(const uint8_t* ibuffer, int size) {
        printf("[debug] Detected NAL chunk size: %i\n",size);
        
        if (size <= 0){
            fprintf(stdout, "  error On h264 chunk detection\n");
            return;
        }

        uint8_t nal_bit = ibuffer[0];
        uint8_t nal_type = (nal_bit & 0x1f);
        
        //0x67
        //if (nal_bit == (NAL_IDC_PICTURE | NAL_TYPE_SPS) ) {
        if ( nal_type == (NAL_TYPE_SPS) ) {

            fprintf(stdout, " processing: 0x%x (SPS)\n",nal_bit);

            //store information to use when arrise PPS nal_bit, probably the next NAL detection
            lastSPS.clear();
            for(int i=0;i<size;i++)
                lastSPS.push_back(ibuffer[i]);
        }
        //else if (nal_bit == (NAL_IDC_PICTURE | NAL_TYPE_PPS) ) {
        else if ( nal_type == (NAL_TYPE_PPS) ) {

            fprintf(stdout, " processing: 0x%x (PPS)\n",nal_bit);

            //must be called just after the SPS detection
            int32_t bodyLength = lastSPS.size() + size + 16;

            //
            // flv tag header = 11 bytes
            //
            mFLVWritter.writeUInt8(0x09);//tagtype video
            mFLVWritter.writeUInt24( bodyLength );//data len
            mFLVWritter.writeUInt32Timestamp( videoTimestamp_ms );//timestamp
            mFLVWritter.writeUInt24( 0 );//stream id 0

            //
            // Message Body = 16 bytes + SPS bytes + PPS bytes
            //
            //flv VideoTagHeader
            mFLVWritter.writeUInt8(0x17);//key frame, AVC 1:keyframe 7:h264
            mFLVWritter.writeUInt8(0x00);//avc sequence header
            mFLVWritter.writeUInt24( 0x00 );//composit time ??????????

            //flv VideoTagBody --AVCDecoderCOnfigurationRecord
            mFLVWritter.writeUInt8(0x01);//configurationversion
            mFLVWritter.writeUInt8(lastSPS[1]);//avcprofileindication
            mFLVWritter.writeUInt8(lastSPS[2]);//profilecompatibilty
            mFLVWritter.writeUInt8(lastSPS[3]);//avclevelindication
            mFLVWritter.writeUInt8(0xFC | 0x03); //reserved + lengthsizeminusone
            mFLVWritter.writeUInt8(0xe0 | 0x01); // first reserved, second number of SPS

            mFLVWritter.writeUInt16( lastSPS.size() ); //sequence parameter set length
            //H264 sequence parameter set raw data
            for(int i=0;i<lastSPS.size();i++)
                mFLVWritter.writeUInt8(lastSPS[i]);

            mFLVWritter.writeUInt8(0x01); // number of PPS

            //sanity check with the packet size...
            if ( size-4 > 0xffff ){
                fprintf(stderr, "PPS Greater than 64k. This muxer does not support it.\n");
                exit(-1);
            }

            mFLVWritter.writeUInt16(size); //picture parameter set length
            //H264 picture parameter set raw data
            for(int i=0;i<size;i++)
                mFLVWritter.writeUInt8(ibuffer[i]);

            //
            // previous tag size
            //
            uint32_t currentSize = mFLVWritter.buffer.size();
            if (currentSize != bodyLength + 11 ){
                fprintf(stderr, "error to write flv video tag NAL_TYPE_PPS\n");
                exit(-1);
            }
            mFLVWritter.writeUInt32(currentSize);//data len
            mFLVWritter.flushToFD(fd);


            videoTimestamp_ms += 1000/30;
        }
        //0x65
        //else if (nal_bit == (NAL_IDC_PICTURE | NAL_TYPE_CSIDRP) ) {
        else if ( nal_type == (NAL_TYPE_CSIDRP) ) {
        

            fprintf(stdout, " processing: 0x%x (0x65)\n",nal_bit);

            uint32_t bodyLength = size + 5 + 4;//flv VideoTagHeader +  NALU length

            //
            // flv tag header = 11 bytes
            //
            mFLVWritter.writeUInt8(0x09);//tagtype video
            mFLVWritter.writeUInt24( bodyLength );//data len
            mFLVWritter.writeUInt32Timestamp( videoTimestamp_ms );//timestamp
            mFLVWritter.writeUInt24( 0 );//stream id 0

            //
            // Message Body = VideoTagHeader(5) + NALLength(4) + NAL raw data
            //
            //flv VideoTagHeader
            mFLVWritter.writeUInt8(0x17);//key frame, AVC 1:keyframe 2:inner frame 7:H264
            mFLVWritter.writeUInt8(0x01);//avc NALU unit
            mFLVWritter.writeUInt24(0x00);//composit time ??????????
            mFLVWritter.writeUInt32(size);//nal length

            //nal raw data
            for(int i=0;i<size;i++)
                mFLVWritter.writeUInt8(ibuffer[i]);

            //
            // previous tag size
            //
            uint32_t currentSize = mFLVWritter.buffer.size();
            if (currentSize != bodyLength + 11 ){
                fprintf(stderr, "error to write flv video tag NAL_TYPE_CSIDRP\n");
                exit(-1);
            }
            mFLVWritter.writeUInt32(currentSize);//data len
            mFLVWritter.flushToFD(fd);

            videoTimestamp_ms += 1000/30;
        }
        //0x61
        //else if (nal_bit == (NAL_IDC_FRAME | NAL_TYPE_CSNIDRP) ) {
        else if ( nal_type == (NAL_TYPE_CSNIDRP) ) {

            fprintf(stdout, " processing: 0x%x (0x61)\n",nal_bit);

            uint32_t bodyLength = size + 5 + 4;//flv VideoTagHeader +  NALU length

            //
            // flv tag header = 11 bytes
            //
            mFLVWritter.writeUInt8(0x09);//tagtype video
            mFLVWritter.writeUInt24( bodyLength );//data len
            mFLVWritter.writeUInt32Timestamp( videoTimestamp_ms );//timestamp
            mFLVWritter.writeUInt24( 0 );//stream id 0

            //
            // Message Body = VideoTagHeader(5) + NALLength(4) + NAL raw data
            //
            //flv VideoTagHeader
            mFLVWritter.writeUInt8(0x27);//key frame, AVC 1:keyframe 2:inner frame 7:H264
            mFLVWritter.writeUInt8(0x01);//avc NALU unit
            mFLVWritter.writeUInt24(0x00);//composit time ??????????
            mFLVWritter.writeUInt32(size);//nal length

            // raw nal data
            for(int i=0;i<size;i++)
                mFLVWritter.writeUInt8(ibuffer[i]);

            //
            // previous tag size
            //
            uint32_t currentSize = mFLVWritter.buffer.size();
            if (currentSize != bodyLength + 11 ){
                fprintf(stderr, "error to write flv video tag NAL_TYPE_CSNIDRP\n");
                exit(-1);
            }
            mFLVWritter.writeUInt32(currentSize);//data len
            mFLVWritter.flushToFD(fd);

            videoTimestamp_ms += 1000/30;

        }

        else if (nal_type == (NAL_TYPE_SEI)) {
            fprintf(stdout, " ignoring SEI bit: 0x%x type: 0x%x\n",nal_bit, nal_type);
            
        } else {
            // nal_bit type not implemented...
            fprintf(stdout, "Error: unknown NAL bit: 0x%x type: 0x%x\n",nal_bit, nal_type);
            exit(-1);
        }
    }

};

要在 main 中使用这些类，我们可以这样写:

volatile bool exit_requested = false;
void signal_handler(int signal) {
    exit_requested = true;
}

int main(int argc, char* argv[]) {
    int fd_stdin = fileno(stdin);

    signal(SIGINT,  signal_handler);
    signal(SIGTERM, signal_handler);
    signal(SIGQUIT, signal_handler);

    unsigned char buffer[65536];
    
    FLVFileWriter flv("out.flv");

    while (!exit_requested) {

        int readedSize = read(fd_stdin,buffer,65536);
        if (readedSize==0)
            break;
        
        flv.parseH264(buffer,readedSize);
    }
    
    flv.endOfStreamH264();

    signal(SIGINT,  SIG_DFL);
    signal(SIGTERM, SIG_DFL);
    signal(SIGQUIT, SIG_DFL);

    return 0;
}

编译上面的程序我们可以执行如下:

cat test.h264 | ./flv

然后我明白了:

我从命令行使用 ffmpeg 来检查我的源 h264 流文件是否已损坏。所以我运行了以下命令:

cat test.h264 | ffmpeg -i - -c:v copy out2.flv

使用 ffmpeg 结果是可以的:

我把我测试的视频放上完整源码here .

The h264 file I used for the test .

最佳答案

WIP(进行中)

但我发现了很多新信息:-)。

第一:FLV 容器无法从 h264 中一个一个地打包 NAL 缓冲区。

其次:只有当有新帧可用时，才需要增加时间戳。

我得到的绿屏问题与时间戳问题的 NAL 包碎片有关。

困难的部分是检测流何时获得新帧。有几种方法提到here .

但是 h264 流在其结构中具有不同位大小的数据成员。

阅读位

ue(v) 数据类型。要读取此结构，您需要计算前导 0 位直到到达位 1，并在我们找到的 1 位之后读取相同数量的 0 位。

h264 header 中此字段的内容可能会有所不同...一个常见的值称为 golomb，它是使用以下公式计算的:(1 << leadingZeros) - 1 + readbits(leadingZeros)。

RBSP - 原始字节序列负载

我们不能直接读取流值，因为它可能包含仿真阻止字节。我们需要在读取 header 之前删除仿真防止字节。

代码

因此，为了缓解这个问题，我首先重写了 H264 解析器，如下所示:

struct sps_info{
    uint8_t profile_idc;
    uint8_t constraints;
    uint8_t level_idc;
    uint8_t log2_max_frame_num;

    bool set;
};

class ParserH264 {
    std::vector<uint8_t> buffer;

    enum State{
        None,
        Data,
        NAL0,
        NAL1,

        EMULATION0,
        EMULATION1,
        EMULATION_FORCE_SKIP
    };
    State nalState; // nal = network abstraction layer
    State writingState;

    void putByte(uint8_t byte) {
        //
        // Detect start code 00 00 01 and 00 00 00 01
        //
        // It returns the buffer right after the start code
        //
        if (byte == 0x00 && nalState == None)
            nalState = NAL0;
        else if (byte == 0x00 && (nalState == NAL0 || nalState == NAL1) )
            nalState = NAL1;
        else if (byte == 0x01 && nalState == NAL1){
            nalState = None;

            if (writingState == None){
                writingState = Data;
                return;
            } else if (writingState == Data){

                buffer.pop_back();// 0x00
                buffer.pop_back();// 0x00

                //in the case using the format 00 00 00 01, remove the last element detected
                if (buffer.size()-2 >=0 &&
                    buffer[buffer.size()-1] == 0x00 &&
                    buffer[buffer.size()-2] != 0x03 )//keep value, if emulation prevention is present
                    buffer.pop_back();

                chunkDetectedH264(buffer);

                buffer.clear();

                return;
            }
        } else
            nalState = None;

        if (writingState == Data){
            //
            // increase raw buffer size
            //
            buffer.push_back(byte);
        }
    }

public:

    ParserH264() {
        nalState = None;
        writingState = None;
    }

    virtual ~ParserH264(){
    }

    // rawBuffer might has emulation bytes
    // Raw Byte Sequence Payload (RBSP)
    virtual void chunkDetectedH264(const std::vector<uint8_t> &nal){

    }

    void endOfStreamH264() {
        if (buffer.size() <= 0)
            return;

        chunkDetectedH264(buffer);

        buffer.clear();
        writingState = None;
        nalState = None;
    }

    void parseH264(const uint8_t* ibuffer, int size) {
        for(int i=0;i<size;i++){
            putByte(ibuffer[i]);
        }
    }


    static inline uint32_t readbit(int bitPos,  const uint8_t* data, int size){
        int dataPosition = bitPos / 8;
        int bitPosition = 7 - bitPos % 8;
        if (dataPosition >= size){
            fprintf(stderr,"error to access bit...\n");
            exit(-1);
        }
        return (data[dataPosition] >> bitPosition) & 0x01;
    }

    // leading 0`s count equals the number of next bits after bit 1
    //
    //  Example: 01x  001xx 0001xxx 00001xxxx
    //
    //  The max number of bits is equal 32 in this sintax
    //
    static inline int bitsToSkip_ue( int start,  const uint8_t* data, int size){
        int bitPos = start;
        int dataPosition = start / 8;
        int bitPosition;
        while (dataPosition < size){
            dataPosition = bitPos / 8;
            bitPosition = 7 - bitPos % 8;
            int bit = (data[dataPosition] >> bitPosition) & 0x01;
            if (bit == 1)
                break;
            bitPos++;
        }
        int leadingZeros = bitPos - start;
        int totalBits = leadingZeros + 1 + leadingZeros;
        if (totalBits > 32){
            fprintf(stderr,"bitsToSkip_ue length greated than 32\n");
            exit(-1);
        }
        return totalBits;
    }

    static inline uint32_t read_golomb_ue(int start,  const uint8_t* data, int size){
        int bitPos = start;
        int dataPosition = start / 8;
        int bitPosition;
        while (dataPosition < size){
            dataPosition = bitPos / 8;
            bitPosition = 7 - bitPos % 8;
            int bit = (data[dataPosition] >> bitPosition) & 0x01;
            if (bit == 1)
                break;
            bitPos++;
        }
        uint32_t leadingZeros = (uint32_t)(bitPos - start);
        uint32_t num = readbits(bitPos+1, leadingZeros, data, size);
        num += (1 << leadingZeros) - 1;

        return num;
    }

    static inline uint32_t readbits(int bitPos, int length, const uint8_t* data, int size){
        if (length > 32){
            fprintf(stderr,"readbits length greated than 32\n");
            exit(-1);
        }
        uint32_t result = 0;
        for(int i=0;i<length;i++){
            result <<= 1;
            result = result | readbit(bitPos+i, data, size);
        }
        return result;
    }

    static inline sps_info parseSPS(const std::vector<uint8_t> sps_rbsp) {
        const uint8_t *data = &sps_rbsp[0];
        uint32_t size = sps_rbsp.size();

        sps_info result;

        result.profile_idc = sps_rbsp[1];
        result.constraints = sps_rbsp[2];
        result.level_idc = sps_rbsp[3];

        uint32_t startIndex = 8+24;//NAL bit + profile_idc (8bits) + constraints (8bits) + level_idc (8bits)
        startIndex += bitsToSkip_ue(startIndex, data, size);//seq_parameter_set_id (ue)

        uint32_t log2_max_frame_num_minus4 = read_golomb_ue(startIndex, data, size);

        if (log2_max_frame_num_minus4 < 0 ||
            log2_max_frame_num_minus4 > 12){
            fprintf(stderr,"parseSPS_log2_max_frame_num_minus4 value not in range [0-12] \n");
            exit(-1);
        }

        result.log2_max_frame_num = log2_max_frame_num_minus4 + 4;
        result.set = true;
        return result;
    }

    // Raw Byte Sequence Payload (RBSP) -- without the emulation prevention bytes
    // maxSize is used to parse just the beggining of the nal structure...
    // avoid process all buffer size on NAL new frame check
    static inline void nal2RBSP(const std::vector<uint8_t> &buffer,
                                std::vector<uint8_t> *rbsp,
                                int maxSize = -1){
        if (maxSize <= 0)
            maxSize = buffer.size();
        rbsp->resize(maxSize);

        State emulationState = None;
        int count = 0;

        for(int i=0; i < maxSize ;i++){
            uint8_t byte = buffer[i];

            if (byte == 0x00 && emulationState == None)
                emulationState = EMULATION0;
            else if (byte == 0x00 && (emulationState == EMULATION0 || emulationState == EMULATION1) )
                emulationState = EMULATION1;
            else if (byte == 0x03 && emulationState == EMULATION1)
            {
                emulationState = EMULATION_FORCE_SKIP;
                continue;
            }
            else if (emulationState == EMULATION_FORCE_SKIP) { //skip 00 01 02 or 03
                if ( byte != 0x00 && byte != 0x01 && byte != 0x02 && byte != 0x03 ){
                    fprintf(stdout, "H264 NAL emulation prevention pattern detected error (%u)\n", byte);
                    exit(-1);
                }
                emulationState = None;
            } else
                emulationState = None;

            (*rbsp)[count] = byte;
            count++;
        }
        if (count != rbsp->size())
            rbsp->resize(count);
    }

};

我更新了 FLVWriter 以写入整个帧(具有 1 个或多个 NAL)并写入视频序列 header 。

代码:

class FLVWritter{
public:
    std::vector<uint8_t> buffer;

    void flushToFD(int fd) {
        if (buffer.size() <= 0)
            return;
        ::write(fd,&buffer[0],buffer.size());
        buffer.clear();
    }

    void reset(){
        buffer.clear();
    }

    void writeFLVFileHeader(bool haveAudio, bool haveVideo){
        uint8_t flv_header[13] = {
            0x46, 0x4c, 0x56, 0x01, 0x05,
            0x00, 0x00, 0x00, 0x09, 0x00,
            0x00, 0x00, 0x00
        };

        if (haveAudio && haveVideo) {
            flv_header[4] = 0x05;
        } else if (haveAudio && !haveVideo) {
            flv_header[4] = 0x04;
        } else if (!haveAudio && haveVideo) {
            flv_header[4] = 0x01;
        } else {
            flv_header[4] = 0x00;
        }

        for(int i=0;i<13;i++)
            buffer.push_back(flv_header[i]);
    }

    void writeUInt8(uint8_t v) {
        buffer.push_back(v);
    }

    void writeUInt16(uint32_t v){
        buffer.push_back((v >> 8) & 0xff);
        buffer.push_back((v) & 0xff);
    }

    void writeUInt24(uint32_t v){
        buffer.push_back((v >> 16) & 0xff);
        buffer.push_back((v >> 8) & 0xff);
        buffer.push_back((v) & 0xff);
    }

    void writeUInt32(uint32_t v){
        buffer.push_back((v >> 24) & 0xff);
        buffer.push_back((v >> 16) & 0xff);
        buffer.push_back((v >> 8) & 0xff);
        buffer.push_back((v) & 0xff);
    }

    void writeUInt32(uint32_t v, std::vector<uint8_t> *data){
        data->push_back((v >> 24) & 0xff);
        data->push_back((v >> 16) & 0xff);
        data->push_back((v >> 8) & 0xff);
        data->push_back((v) & 0xff);
    }

    void writeUInt32Timestamp(uint32_t v){
        buffer.push_back((v >> 16) & 0xff);
        buffer.push_back((v >> 8) & 0xff);
        buffer.push_back((v) & 0xff);
        buffer.push_back((v >> 24) & 0xff);
    }

    void writeVideoSequenceHeader(const std::vector<uint8_t> &sps, const std::vector<uint8_t> &pps, const sps_info &spsinfo) {

        //
        // flv tag header = 11 bytes
        //
        writeUInt8(0x09);//tagtype video
        writeUInt24( sps.size() + pps.size() + 16 );//data len
        writeUInt32Timestamp( 0 );//timestamp
        writeUInt24( 0 );//stream id 0

        //
        // Message Body = 16 bytes + SPS bytes + PPS bytes
        //
        //flv VideoTagHeader
        writeUInt8(0x17);//key frame, AVC 1:keyframe 7:h264
        writeUInt8(0x00);//avc sequence header
        writeUInt24( 0x00 );//composition time

        //flv VideoTagBody --AVCDecoderCOnfigurationRecord
        writeUInt8(0x01);//configurationversion
        writeUInt8(spsinfo.profile_idc);//avcprofileindication
        writeUInt8(spsinfo.constraints);//profilecompatibilty
        writeUInt8(spsinfo.level_idc);//avclevelindication
        writeUInt8(0xFC | 0x03); //reserved (6 bits), NULA length size - 1 (2 bits)
        writeUInt8(0xe0 | 0x01); // first reserved, second number of SPS

        writeUInt16( sps.size() ); //sequence parameter set length
        //H264 sequence parameter set raw data
        for(int i=0;i<sps.size();i++)
            writeUInt8(sps[i]);

        writeUInt8(0x01); // number of PPS

        writeUInt16(pps.size()); //picture parameter set length
        //H264 picture parameter set raw data
        for(int i=0;i<pps.size();i++)
            writeUInt8(pps[i]);

        if (buffer.size() != sps.size() + pps.size() + 16 + 11 ){
            fprintf(stderr, "error writeVideoSequenceHeader\n");
            exit(-1);
        }

        // previous tag size
        writeUInt32(buffer.size());
    }

    void writeVideoFrame(const std::vector<uint8_t> &data, bool keyframe, uint32_t timestamp_ms, int streamID){

        writeUInt8(0x09);//tagtype video
        writeUInt24( data.size() + 5 );//data len
        writeUInt32Timestamp( timestamp_ms );//timestamp
        writeUInt24( streamID );//stream id 0)

        if (keyframe)
            writeUInt8(0x17);//key frame, AVC 1:keyframe 2:inner frame 7:H264
        else
            writeUInt8(0x27);//key frame, AVC 1:keyframe 2:inner frame 7:H264

        writeUInt8(0x01);//avc NALU unit
        writeUInt24(0x00);//composit time ??????????

        for(int i=0;i<data.size();i++)
            writeUInt8(data[i]);

        if (buffer.size() != data.size() + 5 + 11 ){
            fprintf(stderr, "error writeVideoFrame\n");
            exit(-1);
        }

        // previous size
        writeUInt32(buffer.size());

    }

    void writeVideoEndOfStream(uint32_t timestamp_ms, int streamID){
        writeUInt8(0x09);//tagtype video
        writeUInt24( 5 );//data len
        writeUInt32Timestamp( timestamp_ms );//timestamp
        writeUInt24( streamID );//stream id 0)

        writeUInt8(0x17);//key frame, AVC 1:keyframe 2:inner frame 7:H264
        writeUInt8(0x02);//avc EOS
        writeUInt24(0x00);//composit time ??????????

        if (buffer.size() != 5 + 11 ){
            fprintf(stderr, "error writeVideoEOS\n");
            exit(-1);
        }

        // previous size
        writeUInt32(buffer.size());
    }

};

接下来我们需要从流中检测新帧。

我已经实现了几个方法和 frame_num 方法。

要读取 frame_num，我们需要在读取 frame_num 位之前跳过 3 个 ue(v) 结构。

现在是 h264 新的帧检测器类:

class H264NewFrameDetection {
    std::vector<uint8_t> aux;

    bool newFrameOnNextIDR;
    uint32_t old_frame_num;
    bool spsinfo_set;

    bool firstFrame;
public:
    uint32_t currentFrame;
    bool newFrameFound;

    H264NewFrameDetection() {
        currentFrame = 0;
        newFrameOnNextIDR = false;
        old_frame_num = 0;
        spsinfo_set = false;
        firstFrame = true;
    }

    void analyseBufferForNewFrame(const std::vector<uint8_t> &nal, const sps_info &spsinfo){

        uint8_t nal_bit = nal[0];
        uint8_t nal_type = (nal_bit & 0x1f);

        if ( nal_type == (NAL_TYPE_SPS) ||
            nal_type == (NAL_TYPE_PPS) ||
            nal_type == (NAL_TYPE_AUD) ||
            nal_type == (NAL_TYPE_SEI) ||
            (nal_type >= 14 && nal_type <= 18)
            ) {
            newFrameOnNextIDR = true;
        }

        if ((nal_type == NAL_TYPE_CSIDRP ||
             nal_type == NAL_TYPE_CSNIDRP)
            && spsinfo.set ){

            aux.clear();
            //(8 + 3*(32*2+1) + 16) = max header per NALU slice bits = 27.375 bytes
            // 32 bytes + 8 (Possibility of Emulation in 32 bytes)
            int RBSPMaxBytes = 32 + 8;
            if (nal.size() < (32 + 8))
                RBSPMaxBytes = nal.size();
            ParserH264::nal2RBSP(nal, &aux, RBSPMaxBytes );
            uint8_t * rbspBuffer = &aux[0];
            uint32_t rbspBufferSize = aux.size();

            uint32_t frame_num_index = 8;//start counting after the nal_bit
            //first_mb_in_slice (ue)
            frame_num_index += ParserH264::bitsToSkip_ue(frame_num_index, rbspBuffer, rbspBufferSize);
            //slice_type (ue)
            frame_num_index += ParserH264::bitsToSkip_ue(frame_num_index, rbspBuffer, rbspBufferSize);
            //pic_parameter_set_id (ue)
            frame_num_index += ParserH264::bitsToSkip_ue(frame_num_index, rbspBuffer, rbspBufferSize);
            //now can read frame_num
            uint32_t frame_num = ParserH264::readbits(frame_num_index,
                                                      spsinfo.log2_max_frame_num,
                                                      rbspBuffer, rbspBufferSize);

            if (!spsinfo_set){
                old_frame_num = frame_num;
                spsinfo_set = true;//spsinfo.set
            }

            if (old_frame_num != frame_num){
                newFrameOnNextIDR = true;
                old_frame_num = frame_num;
            }
        }


        if (newFrameOnNextIDR &&(nal_type == NAL_TYPE_CSIDRP ||
                                 nal_type == NAL_TYPE_CSNIDRP)) {
            newFrameOnNextIDR = false;
            if (firstFrame){//skip the first frame
                firstFrame = false;
            } else {
                newFrameFound = true;
                currentFrame++;
            }
        }
    }

    void reset(){
        newFrameFound = false;
    }

};

最后是作家类。

在这个实现中，我存储 NAL，直到检测到新帧。当它找到一个新帧时，我用 NAL 集(所有 NAL 都存储在列表中)写入 FLV 标签。

代码:

class FLVFileWriter: public ParserH264 {

    std::vector<uint8_t> sps;
    std::vector<uint8_t> pps;

    sps_info spsInfo;

    H264NewFrameDetection mH264NewFrameDetection;

public:
    FLVWritter mFLVWritter;
    int fd;
    uint32_t videoTimestamp_ms;

    //contains the several NALs until complete a frame... after that can write to FLV
    std::vector<uint8_t> nalBuffer;

    FLVFileWriter ( const char* file ) {
        fd = open(file, O_WRONLY | O_CREAT | O_TRUNC , 0644 );
        if (fd < 0){
            fprintf(stderr,"error to create flv file\n");
            exit(-1);
        }

        //have audio, have video
        mFLVWritter.writeFLVFileHeader(false, true);
        mFLVWritter.flushToFD(fd);

        videoTimestamp_ms = 0;        
        spsInfo.set = false;
    }

    virtual ~FLVFileWriter(){
        if (fd >= 0){

            //force write the last frame
            if (nalBuffer.size() > 0){
                uint8_t firstNALtype = nalBuffer[4] & 0x1f;

                bool iskeyframe = (firstNALtype == NAL_TYPE_CSIDRP);
                mFLVWritter.writeVideoFrame(nalBuffer, iskeyframe, videoTimestamp_ms, 0);
                mFLVWritter.flushToFD(fd);

                nalBuffer.clear();

                mFLVWritter.writeVideoEndOfStream(videoTimestamp_ms,0);
                mFLVWritter.flushToFD(fd);
            } else {
                if (mH264NewFrameDetection.currentFrame > 0)
                    videoTimestamp_ms = ((mH264NewFrameDetection.currentFrame-1) * 1000)/30;
                mFLVWritter.writeVideoEndOfStream(videoTimestamp_ms,0);
                mFLVWritter.flushToFD(fd);
            }
            close(fd);
        }
    }


    //decoding time stamp (DTS) and presentation time stamp (PTS)
    void chunkDetectedH264(const std::vector<uint8_t> &data) {

        if (data.size() <= 0){
            fprintf(stdout, "  error On h264 chunk detection\n");
            return;
        }

        uint8_t nal_bit = data[0];
        uint8_t nal_type = (nal_bit & 0x1f);

        mH264NewFrameDetection.analyseBufferForNewFrame(data, spsInfo);

        if (mH264NewFrameDetection.newFrameFound){
            mH264NewFrameDetection.reset();

            uint8_t firstNALtype = nalBuffer[4] & 0x1f;

            bool iskeyframe = (firstNALtype == NAL_TYPE_CSIDRP);
            mFLVWritter.writeVideoFrame(nalBuffer, iskeyframe, videoTimestamp_ms, 0);
            mFLVWritter.flushToFD(fd);

            nalBuffer.clear();

            videoTimestamp_ms = (mH264NewFrameDetection.currentFrame * 1000)/30;
        }

        //0x67
        if ( nal_type == (NAL_TYPE_SPS) ) {
            fprintf(stdout, " processing: 0x%x (SPS)\n",nal_bit);

            sps.clear();
            //max 26 bytes on sps header (read until log2_max_frame_num_minus4)
            nal2RBSP(data, &sps, 26 );
            spsInfo = parseSPS(sps);

            sps.clear();
            for(int i=0;i<data.size();i++)
                sps.push_back(data[i]);

        }
        //0x68
        else if ( nal_type == (NAL_TYPE_PPS) ) {
            fprintf(stdout, " processing: 0x%x (PPS)\n",nal_bit);

            pps.clear();
            for(int i=0;i<data.size();i++)
                pps.push_back(data[i]);

            mFLVWritter.writeVideoSequenceHeader(sps, pps, spsInfo);
            mFLVWritter.flushToFD(fd);

        }
        //0x65, 0x61, 0x41
        else if ( nal_type == NAL_TYPE_CSIDRP ||
                  nal_type == NAL_TYPE_CSNIDRP ) {
            //convert annexb to AVCC (length before the NAL structure)
            mFLVWritter.writeUInt32( data.size(), &nalBuffer );
            for(int i=0;i<data.size();i++)
                nalBuffer.push_back(data[i]);

        } else if (nal_type == (NAL_TYPE_SEI)) {
            fprintf(stdout, " ignoring SEI bit: 0x%x type: 0x%x\n",nal_bit, nal_type);
        } else {
            // nal_bit type not implemented...
            fprintf(stdout, "Error: unknown NAL bit: 0x%x type: 0x%x\n",nal_bit, nal_type);
            exit(-1);
        }

    }
};

结论

经过测试，该视频可以在这个新实现中正常运行。

我现在唯一的问题是与 PTS(演示时间戳)和 DTS(解码器时间戳)有关。我不知道如何从流中提取它们以及如何使用它们来计算帧时间戳。

当前的实现使用硬编码比率 1000/30(例如 30fps)作为增加时间戳的基础。

时间戳以毫秒为单位。

关于c++ - 封装 H.264 的 C/C++ FLV 多路复用器未按预期工作，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/56894728/

文章推荐： c++ - 像打包一样获取聚合的大小

文章推荐： c++ - 渲染到帧缓冲区/纹理显示为空

文章推荐： c++ - 编译错误 'set_ssl_context_callback' : on Windows

文章推荐： c++ - 优化调用函数指针数组

javascript - (未)旋转矩形上的点
我找到了 this excellent question and answer它以 x/y(加上 center x/y 和 degrees/radians)开始并计算旋转- 到 x'/y'。这个计算很
.net - 为什么非法跨线程*未*被检测到？
全部: 我已经创建了一个 Windows 窗体和一个按钮。在另一个线程中，我试图更改按钮的文本，但它崩溃了；但是如果我尝试更改按钮的颜色，它肯定会成功。我认为如果您更改任何 Windows 窗体控件属
c - (未)签名短整型 (C)
本网站的另一个问题已证实，C 中没有缩写的字面后缀，并且可以执行以下操作: short Number = (short)1; 但是转换它和不这样做有什么区别: short Number = 1; 您使
SQL:从表中获取最新的(未)订阅操作
我有下表: ID (int) EMAIL (varchar(50)) CAMPAIGNID (int) isSubscribe (bit) isActionByUser (bit) 此表存储了用户对事
javascript - 如何复制手动(未)选中复选框的状态？
也就是说，无需触发Javascript事件即可改变的属性，如何保留我手动选中或取消选中的复选框的状态，然后复制到另一个地方？运行下面的代码片段并选中或取消选中其中的一些，然后点击“复制”: $('#
c++ - 可以在不取消引用的情况下增加指针仍然是段错误或具有其他(未)定义的肮脏吗？
我在网上找到的所有关于递增指针导致段错误的示例都涉及指针的取消引用 - 如果我只想递增它(例如在 for 循环的末尾)并且我不在乎它是否最终进入无效内存，因为我不会再使用它。例如，在这个程序中，每次迭
java - 如何获取用于记录的(未)编码消息
我有一个 Spring MVC REST 服务，它使用 XStream 将消息与 XML 相互转换。有什么方法可以将请求和响应中的 xml(即正文)打印到普通的 log4j 记录器？在 Contr
git - 当我需要其他分支的一些代码时如何为功能创建分支(未 merge 到开发分支中)
做我的任务有一个很大的挑战，那就是做相互依赖的任务我在这张照片中说的。假设我们有两个任务 A 和 B，执行子任务 A1、A2 和 B1、B2，假设任务 B 依赖于 A。要理想地执行任务 B，您应该执
delphi - 如何从库的角度处理 COM(未)初始化？
通过阅读该网站上的几个答案，我了解到 CoInitialize(Ex) should be called by the creator of a thread 。然后，在该线程中运行的任何代码都可以使
java - FirebaseListAdapter 未 populateView 未被调用
这个问题已经困扰我一段时间了。我以前从未真正使用过 ListViews，也没有使用过 FirebaseListAdapters。我想做的就是通过显示 id 和用户位置来启动列表的基础，但由于某种原因，
java - 检查(未)检查异常 (Java)
我很难解释这两个(看似简单)句子的含义: “受检异常由编译器在编译时检查” 这是什么意思？编译器检查是否捕获了所有已检查的异常(在代码中抛出)？ “未经检查的异常在运行时检查，而不是编译时” 这句话中
python - 如果在值中*未*找到特定的迭代子字符串，如何返回值？
我有一个包含排除子字符串的文本文件，我想迭代该文件以检查并返回不带排除子字符串的输入项。这里我使用 python 2.4，因此下面的代码可以实现此目的，因为 with open 和 any 不起作用
java - 对于(未)验证的请求绕过 @Cacheable
Spring 的缓存框架能否了解请求上下文的身份验证状态，或者更容易推出自己的缓存解决方案？最佳答案尽管我发现这个用例 super 奇怪，但您可以为几乎任何与 SpEL 配合使用的内容设置缓存条件
c++ - 指针/整数算术(未)定义的行为
我有以下函数模板: template HeldAs* duplicate(MostDerived *original, HeldAs *held) { // error checking omi
android - 如果您的应用程序具有设备管理员/设备所有者权限(未 Root )，如何杀死另一个应用程序
如果我的应用程序具有设备管理员/设备所有者权限(未获得 root 权限)，我如何才能从我的应用程序中终止(或阻止启动)另一个应用程序？最佳答案设备所有者可以阻止应用程序: DevicePolicy
tridion - 组件中的 XSLT 被(未)编码
非常简单的问题，但我似乎无法让它正常工作。我有一个组件，其中有一些 XSLT(用于导航)。它通过 XSLT TBB 使用 XSLT Mediator 发布。发布后
jquery - Droppable 创建(未)嵌套对象 - 但可拖动的位置发生变化
我正在将一个对象拖动到一个可拖放的对象内，该对象也是可拖动的。放置对象后，它会嵌套在可放置对象内。同样，如果我将对象拖到可放置的外部，它就不再嵌套。但是，如果我经常拖入和拖出可放置对象，则可拖动对象
jquery - 如何检测 Angular 中是否*未*单击指令元素
我正在尝试为按钮和弹出窗口等多个指令实现“取消选择”功能。也就是说，我希望当用户单击不属于指令模板一部分的元素时触发我的函数。目前，我正在使用以下 JQuery 代码: $('body').click
javascript - 加载 CSS 未 100% 工作
我从 this question 得到了下面的代码，该脚本用于在 Google tasks 上更改 iframe[src="about:blank"] 内的 CSS使用 Chrome 扩展 Tempe
java - 未 stub 调用的 native 类型的返回值
我有一些 @Mock 对象，但没有指定在该对象上调用方法的返回值。该方法返回 int (不是 Integer)。我很惊讶地发现 Mockito 没有抛出 NPE 并返回 0。这是预期的行为吗？例如:

塔克拉玛干

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城

c++ - 封装 H.264 的 C/C++ FLV 多路复用器未按预期工作