gpt4 book ai didi

h.264 - 从H.264 NALU获取视频的宽度/高度

转载 作者:行者123 更新时间:2023-12-04 03:03:27 35 4
gpt4 key购买 nike

我已经在NALU(AVC解码器配置记录)中获得了SPS,并尝试从中解析视频的宽度/高度。

67 64 00 15 ac c8 60 20 09 6c 04 40 00 00 03 00 40 00 00 07 a3 c5 8b 67 80 

这是我的代码解析SPS,但是得到了错误的值。
pic_width_in_mbs_minus1为5,pic_height_in_map_units_minus1为1。
实际上视频是512 X 288px
typedef struct _SequenceParameterSet
{
private:
const unsigned char * m_pStart;
unsigned short m_nLength;
int m_nCurrentBit;

unsigned int ReadBit()
{
ATLASSERT(m_nCurrentBit <= m_nLength * 8);
int nIndex = m_nCurrentBit / 8;
int nOffset = m_nCurrentBit % 8 + 1;

m_nCurrentBit ++;
return (m_pStart[nIndex] >> (8-nOffset)) & 0x01;
}

unsigned int ReadBits(int n)
{
int r = 0;
int i;
for (i = 0; i < n; i++)
{
r |= ( ReadBit() << ( n - i - 1 ) );
}
return r;
}

unsigned int ReadExponentialGolombCode()
{
int r = 0;
int i = 0;

while( (ReadBit() == 0) && (i < 32) )
{
i++;
}
r = ReadBits(i);
r += (1 << i) - 1;
return r;
}


unsigned int ReadSE()
{
int r = ReadExponentialGolombCode();
if (r & 0x01)
{
r = (r+1)/2;
}
else
{
r = -(r/2);
}
return r;
}
public:

void Parse(const unsigned char * pStart, unsigned short nLen)
{
m_pStart = pStart;
m_nLength = nLen;
m_nCurrentBit = 0;
int profile_idc = ReadBits(8);
int constraint_set0_flag = ReadBit();
int constraint_set1_flag = ReadBit();
int constraint_set2_flag = ReadBit();
int constraint_set3_flag = ReadBit();
int constraint_set4_flag = ReadBit();
int constraint_set5_flag = ReadBit();
int reserved_zero_2bits = ReadBits(2);
int level_idc = ReadBits(8);
int seq_parameter_set_id = ReadExponentialGolombCode();

if( profile_idc == 100 || profile_idc == 110 ||
profile_idc == 122 || profile_idc == 144 )
{
int chroma_format_idc = ReadExponentialGolombCode();
if( chroma_format_idc == 3 )
{
int residual_colour_transform_flag = ReadBit();
}
int bit_depth_luma_minus8 = ReadExponentialGolombCode();
int bit_depth_chroma_minus8 = ReadExponentialGolombCode();
int qpprime_y_zero_transform_bypass_flag = ReadBit();
int seq_scaling_matrix_present_flag = ReadBit();
if( seq_scaling_matrix_present_flag )
{
for( int i = 0; i < 8; i++ )
{
int seq_scaling_list_present_flag = ReadBit();
if( seq_scaling_list_present_flag )
{
/*
if( i < 6 )
{
read_scaling_list( b, sps->ScalingList4x4[ i ], 16,
sps->UseDefaultScalingMatrix4x4Flag[ i ]);
}
else
{
read_scaling_list( b, sps->ScalingList8x8[ i - 6 ], 64,
sps->UseDefaultScalingMatrix8x8Flag[ i - 6 ] );
}
*/
}
}
}
}
int log2_max_frame_num_minus4 = ReadExponentialGolombCode();
int pic_order_cnt_type = ReadExponentialGolombCode();
if( pic_order_cnt_type == 0 )
{
int log2_max_pic_order_cnt_lsb_minus4 = ReadExponentialGolombCode();
}
else if( pic_order_cnt_type == 1 )
{
int delta_pic_order_always_zero_flag = ReadBit();
int offset_for_non_ref_pic = ReadSE();
int offset_for_top_to_bottom_field = ReadSE();
int num_ref_frames_in_pic_order_cnt_cycle = ReadExponentialGolombCode();
for( int i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++ )
{
ReadSE();
//sps->offset_for_ref_frame[ i ] = ReadSE();
}
}
int num_ref_frames = ReadExponentialGolombCode();
int gaps_in_frame_num_value_allowed_flag = ReadBit();
int pic_width_in_mbs_minus1 = ReadExponentialGolombCode();
int pic_height_in_map_units_minus1 = ReadExponentialGolombCode();
int frame_mbs_only_flag = ReadBit();
if( !frame_mbs_only_flag )
{
int mb_adaptive_frame_field_flag = ReadBit();
}
int direct_8x8_inference_flag = ReadBit();
int frame_cropping_flag = ReadBit();
if( frame_cropping_flag )
{
int frame_crop_left_offset = ReadExponentialGolombCode();
int frame_crop_right_offset = ReadExponentialGolombCode();
int frame_crop_top_offset = ReadExponentialGolombCode();
int frame_crop_bottom_offset = ReadExponentialGolombCode();
}
int vui_parameters_present_flag = ReadBit();

pStart++;
}
}SequenceParameterSet, *LPSequenceParameterSet;

这是我的代码解析SPS,但是得到了错误的值。 pic_width_in_mbs_minus1为5,pic_height_in_map_units_minus1为1。实际上,视频为512 X 288px

谢谢

最佳答案

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

const unsigned char * m_pStart;
unsigned short m_nLength;
int m_nCurrentBit;

unsigned int ReadBit()
{
assert(m_nCurrentBit <= m_nLength * 8);
int nIndex = m_nCurrentBit / 8;
int nOffset = m_nCurrentBit % 8 + 1;

m_nCurrentBit ++;
return (m_pStart[nIndex] >> (8-nOffset)) & 0x01;
}

unsigned int ReadBits(int n)
{
int r = 0;
int i;
for (i = 0; i < n; i++)
{
r |= ( ReadBit() << ( n - i - 1 ) );
}
return r;
}

unsigned int ReadExponentialGolombCode()
{
int r = 0;
int i = 0;

while( (ReadBit() == 0) && (i < 32) )
{
i++;
}

r = ReadBits(i);
r += (1 << i) - 1;
return r;
}

unsigned int ReadSE()
{
int r = ReadExponentialGolombCode();
if (r & 0x01)
{
r = (r+1)/2;
}
else
{
r = -(r/2);
}
return r;
}

void Parse(const unsigned char * pStart, unsigned short nLen)
{
m_pStart = pStart;
m_nLength = nLen;
m_nCurrentBit = 0;

int frame_crop_left_offset=0;
int frame_crop_right_offset=0;
int frame_crop_top_offset=0;
int frame_crop_bottom_offset=0;

int profile_idc = ReadBits(8);
int constraint_set0_flag = ReadBit();
int constraint_set1_flag = ReadBit();
int constraint_set2_flag = ReadBit();
int constraint_set3_flag = ReadBit();
int constraint_set4_flag = ReadBit();
int constraint_set5_flag = ReadBit();
int reserved_zero_2bits = ReadBits(2);
int level_idc = ReadBits(8);
int seq_parameter_set_id = ReadExponentialGolombCode();


if( profile_idc == 100 || profile_idc == 110 ||
profile_idc == 122 || profile_idc == 244 ||
profile_idc == 44 || profile_idc == 83 ||
profile_idc == 86 || profile_idc == 118 )
{
int chroma_format_idc = ReadExponentialGolombCode();

if( chroma_format_idc == 3 )
{
int residual_colour_transform_flag = ReadBit();
}
int bit_depth_luma_minus8 = ReadExponentialGolombCode();
int bit_depth_chroma_minus8 = ReadExponentialGolombCode();
int qpprime_y_zero_transform_bypass_flag = ReadBit();
int seq_scaling_matrix_present_flag = ReadBit();

if (seq_scaling_matrix_present_flag)
{
int i=0;
for ( i = 0; i < 8; i++)
{
int seq_scaling_list_present_flag = ReadBit();
if (seq_scaling_list_present_flag)
{
int sizeOfScalingList = (i < 6) ? 16 : 64;
int lastScale = 8;
int nextScale = 8;
int j=0;
for ( j = 0; j < sizeOfScalingList; j++)
{
if (nextScale != 0)
{
int delta_scale = ReadSE();
nextScale = (lastScale + delta_scale + 256) % 256;
}
lastScale = (nextScale == 0) ? lastScale : nextScale;
}
}
}
}
}

int log2_max_frame_num_minus4 = ReadExponentialGolombCode();
int pic_order_cnt_type = ReadExponentialGolombCode();
if( pic_order_cnt_type == 0 )
{
int log2_max_pic_order_cnt_lsb_minus4 = ReadExponentialGolombCode();
}
else if( pic_order_cnt_type == 1 )
{
int delta_pic_order_always_zero_flag = ReadBit();
int offset_for_non_ref_pic = ReadSE();
int offset_for_top_to_bottom_field = ReadSE();
int num_ref_frames_in_pic_order_cnt_cycle = ReadExponentialGolombCode();
int i;
for( i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++ )
{
ReadSE();
//sps->offset_for_ref_frame[ i ] = ReadSE();
}
}
int max_num_ref_frames = ReadExponentialGolombCode();
int gaps_in_frame_num_value_allowed_flag = ReadBit();
int pic_width_in_mbs_minus1 = ReadExponentialGolombCode();
int pic_height_in_map_units_minus1 = ReadExponentialGolombCode();
int frame_mbs_only_flag = ReadBit();
if( !frame_mbs_only_flag )
{
int mb_adaptive_frame_field_flag = ReadBit();
}
int direct_8x8_inference_flag = ReadBit();
int frame_cropping_flag = ReadBit();
if( frame_cropping_flag )
{
frame_crop_left_offset = ReadExponentialGolombCode();
frame_crop_right_offset = ReadExponentialGolombCode();
frame_crop_top_offset = ReadExponentialGolombCode();
frame_crop_bottom_offset = ReadExponentialGolombCode();
}
int vui_parameters_present_flag = ReadBit();
pStart++;

int Width = ((pic_width_in_mbs_minus1 +1)*16) - frame_crop_bottom_offset*2 - frame_crop_top_offset*2;
int Height = ((2 - frame_mbs_only_flag)* (pic_height_in_map_units_minus1 +1) * 16) - (frame_crop_right_offset * 2) - (frame_crop_left_offset * 2);

printf("\n\nWxH = %dx%d\n\n",Width,Height);

}

void FindJPGFileResolution(char *cpFileName, int *ipWidth, int *ipHeight)
{
int i;

FILE *fp = fopen(cpFileName,"rb");
fseek(fp,0,SEEK_END);
long len = ftell(fp);
fseek(fp,0,SEEK_SET);

unsigned char *ucpInBuffer = (unsigned char*) malloc (len+1);
fread(ucpInBuffer,1,len,fp);
fclose(fp);

printf("\n\nBuffer size %ld\n", len);
for(i=0;i<len;i++)
{
//printf(" %x", ucpInBuffer[i]);
if(
(ucpInBuffer[i]==0x00) && (ucpInBuffer[i+1]==0x00) &&
(ucpInBuffer[i+2]==0x00) && (ucpInBuffer[i+3]==0x01)
)
{
//if(ucpInBuffer[i+4] & 0x0F ==0x07)
if(ucpInBuffer[i+4] == 0x67 || ucpInBuffer[i+4] == 0x27)
{
Parse(&ucpInBuffer[i+5], len);
break;
}
}
}

free(ucpInBuffer);
return;
}


int main()
{
int iHeight=0, iWidth=0;
char *cpFileName = "/home/pankaj/pankil/SameSystem_H264_1920x320.264";
FindJPGFileResolution(cpFileName, &iWidth, &iHeight);
return 0;
}

试试这个编码。这已经成功运行,我已经使用gcc编译器进行了测试。
只需设置您的文件路径即可编译并运行它。

关于h.264 - 从H.264 NALU获取视频的宽度/高度,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/12018535/

35 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com