一、背景
最近搞Intel平台的多路复用视频解码,发现某些视频会报解码不兼容参数的报错,分析MFX的源码(如下图所示),解码实例通过对比新旧的SPS部分参数来判断是否是相同的视频,当前应用只判断了分辨率,所以当遇到sps_max_dec_pic_buffering数量不一致的码流时,就会导致解码器报错。 因此应用业务上需要做SPS解析,对比关键参数以对解码器做适当的复位处理。
二、SPS语法解析
SPS(sequence parameter set)图像序列参数集描述了视频序列级别的编码参数,主要作用有如下几点:
1、描述视频序列的宽度、高度、帧率、颜色空间等基本信息,帮助解码器正确解码视频序列;
2、定义帧内预测模式和帧间模式的参数,包括用于帧间参考的帧间跨度、缩放列表和运动向量参考等参数;
3、指示编码器如何将编码数据划分为CTU(Coding Tree Unit,编码树单元),以及如何对CTU进行编码;
下图为profile_tier_level语义:
如上图所示,为所需参数在SPS中的位置,以及参数的语法。在表中可以看到第二列,每个参数后面都跟着read_bits(n)的描述信息,该信息指定了对应参数的解析方法。read_bits(n)从比特流中读取接下来的n个比特,并且将比特流指针向前移动n个比特。当n=0时,read_bits(n)的返回值为0,并且不移动比特流指针。具体含义如下:
1、ae(v):上下文自适应算术熵编码语法语法元素。v表示所占比特是可变的,具体可由算法解析。
2、b(8): 任意形式的8比特字节。该描述符的解析过程通过函数read_bit(8)的返回值来规定。
3、f(n): n位固定模式比特串(由左至右),左位在先,该描述符的解析过程通过函数read_bits(n)的返回值来规定。
4、i(n): 使用n比特的有符号整数。如果n=“v”,所占比特数由其他语法元素来确定。解析过程由函数read_bits(n)的返回值规定,该返回值用最高有效位在前的2的补码表示。
5、se(v): 有符号整数0阶指数编码的语法元素,左位在前。
6、st(v): 以null结尾的字符串,编码为ISO/IEC 10646中规定的通用编码字符集(UCS)传输格式-8(UTF-8)字符。解析过程如下所示:st(v)从比特流中的字节对齐位置开始,从比特流读取并返回一系列字节,从当前位置开始,一直到但不包括等于0x00的下一个字节对齐字节,并将比特流指针前进(stringLength+1)*8个比特位置,其中,stringLength等于返回的字节数。
7、u(n): n位无符号整数。在语法表中,如果n=“v”,所占比特数由其他语法元素来确定。解析过程由函数read_bits(n)的返回值规定,该返回值用最高有效位在前的二进制表示。
8、ue(v): 无符号值整数0阶指数哥伦布编码的语法元素,左位在先。
从SPS语法表中可以看到,SPS元素只使用了u(n)和ue(v)编码方式。因此,根据表格所示,具体的语法解析代码如下:
1 #include <stdint.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4
5 static int held_bits_num;
6 static int held_bits;
7
8
9 int read_bits(uint8_t **pp, int bitnum)
10 {
11 uint8_t *pByte = *pp;
12 int code = 0;
13 int aligend = 0;
14 int next_held_bits_num;
15
16 if (bitnum < held_bits_num) //bitnum小于前一次读取剩余bit,直接返回
17 {
18 code = held_bits >> (held_bits_num - bitnum);
19 code &= ~(0xff << bitnum);
20 held_bits_num -= bitnum;
21 return code;
22 }
23
24 //bitnum大于前一次读取剩余bit,先读取剩余,不足bit再取新Byte
25 bitnum -= held_bits_num;
26 code = held_bits & ~(0xff << held_bits_num);
27 code <<= bitnum;
28
29 switch((bitnum-1)>>3)
30 {//利用switch读取1~4字节数据
31 case 3: aligend = (*pByte++) << 24;
32 case 2: aligend |= (*pByte++) << 16;
33 case 1: aligend |= (*pByte++) << 8;
34 case 0: aligend |= (*pByte++);
35 }
36 next_held_bits_num = (32-bitnum) % 8;
37 code |= aligend >> next_held_bits_num;
38
39 held_bits_num = next_held_bits_num; //存储剩余可读bits
40 held_bits = aligend;
41
42 *pp = pByte; //更新sps地址
43
44 return code;
45 }
46
47 int read_bits_ue(uint8_t **pp)
48 {
49 int ZeroBits = -1;
50 int code;
51 int bitval;
52 for (bitval = 0; !bitval; ZeroBits++)
53 bitval = read_bits(pp, 1);
54 code = (1 << ZeroBits)-1 + read_bits(pp, ZeroBits);
55
56 return code;
57 }
58
59 int hevc_analysis_sps(uint8_t *pSps)
60 {
61 const uint8_t profilePresentFlag = 1;
62 uint8_t **p = &pSps;
63 int32_t sps_max_sub_layers;
64 int32_t general_profile_idc;
65 uint8_t general_profile_compatibility_flag[32];
66 uint8_t general_level_idc;
67 uint8_t sub_layer_profile_present_flag[8];
68 uint8_t sub_layer_profile_idc[8];
69 uint8_t sub_layer_level_present_flag[8];
70 uint8_t sub_layer_profile_compatibility_flag[8][32];
71 int32_t chroma_format_idc;
72 int32_t pic_width_in_luma_samples, pic_height_in_luma_samples;
73 int32_t bit_depth_luma_minus8, bit_depth_chroma_minus8;
74 uint8_t sps_sub_layer_ordering_info_present_flag;
75 int32_t sps_max_dec_pic_buffering_minus1[7];
76
77 read_bits(p, 4); //sps_video_parameter_set_id
78 sps_max_sub_layers = read_bits(p, 3); //sps_max_sub_layers_minus1
79 read_bits(p, 1); //sps_temporal_id_nesting_flag
80
81 //profile_tier_level start
82 if (profilePresentFlag)
83 {
84 read_bits(p, 2); //general_profile_space
85 read_bits(p, 1); //general_tier_flag
86 general_profile_idc = read_bits(p, 5); //general_profile_idc
87 for (int j=0; j<32; j++)
88 {
89 general_profile_compatibility_flag[j] = read_bits(p, 1);
90 }
91 read_bits(p, 1); //general_progressive_source_flag
92 read_bits(p, 1); //general_interlaced_source_falg
93 read_bits(p, 1); //general_non_packed_constraint_flag
94 read_bits(p, 1); //general_frame_only_constraint_falg
95 if (general_profile_idc == 4 || general_profile_compatibility_flag[4] ||
96 general_profile_idc == 5 || general_profile_compatibility_flag[5] ||
97 general_profile_idc == 6 || general_profile_compatibility_flag[6] ||
98 general_profile_idc == 7 || general_profile_compatibility_flag[7] ||
99 general_profile_idc == 8 || general_profile_compatibility_flag[8] ||
100 general_profile_idc == 9 || general_profile_compatibility_flag[9] ||
101 general_profile_idc == 10 || general_profile_compatibility_flag[10] ||
102 general_profile_idc == 11 || general_profile_compatibility_flag[11])
103 {
104 read_bits(p, 1); //general_max_12bit_constraint_flag
105 read_bits(p, 1); //general_max_10bit_constraint_falg
106 read_bits(p, 1); //general_max_8bit_constraint_falg
107 read_bits(p, 1); //general_max_422chroma_constraint_flag
108 read_bits(p, 1); //general_max_420chroma_constraint_flag
109 read_bits(p, 1); //general_max_monochrome_constraint_flag
110 read_bits(p, 1); //general_intra_constraint_flag
111 read_bits(p, 1); //general_one_picture_only_constraint_flag
112 read_bits(p, 1); //general_lower_bit_rate_constraint_flag
113 if (general_profile_idc == 5 || general_profile_compatibility_flag[5] ||
114 general_profile_idc == 9 || general_profile_compatibility_flag[9] ||
115 general_profile_idc == 10 || general_profile_compatibility_flag[10] ||
116 general_profile_idc == 11 || general_profile_compatibility_flag[11])
117 {
118 read_bits(p, 1); //general_max_14bit_constraint_flag
119 read_bits(p, 33); //general_reserved_zero_33bits
120 }else{
121 read_bits(p, 34); //general_reserved_zero_34bits
122 }
123 }
124 else if(general_profile_idc == 2 || general_profile_compatibility_flag[2])
125 {
126 read_bits(p, 7); //general_reserved_zero_7bits
127 read_bits(p, 1); //general_one_picture_only_constraint_flag
128 read_bits(p, 35); //general_reserved_zero_34bits
129 }
130 else
131 read_bits(p, 43); //general_reserved_zero_43bits
132
133 if (general_profile_idc == 1 || general_profile_compatibility_flag[1] ||
134 general_profile_idc == 2 || general_profile_compatibility_flag[2] ||
135 general_profile_idc == 3 || general_profile_compatibility_flag[3] ||
136 general_profile_idc == 4 || general_profile_compatibility_flag[4] ||
137 general_profile_idc == 5 || general_profile_compatibility_flag[5] ||
138 general_profile_idc == 9 || general_profile_compatibility_flag[9] ||
139 general_profile_idc == 11 || general_profile_compatibility_flag[11])
140 {
141 read_bits(p, 1); //general_inbld_flag
142 }
143 else
144 read_bits(p, 1); //general_reserved_zero_bit
145 }
146 general_level_idc = read_bits(p, 8);
147 for(int i=0; i<sps_max_sub_layers; i++)
148 {
149 read_bits(p, 1); //sub_layer_profile_present_flag[i]
150 read_bits(p, 1); //sub_layer_level_present_flag[i]
151 }
152 if (sps_max_sub_layers > 0)
153 {
154 for(int i=sps_max_sub_layers; i<8; i++)
155 read_bits(p, 2); //reserved_zero_2bits[i]
156 }
157 for(int i=0; i<sps_max_sub_layers; i++)
158 {
159 if (sub_layer_profile_present_flag[i])
160 {
161 read_bits(p, 2); //sub_layer_profile_space[i]
162 read_bits(p, 1); //sub_layer_tier_flag[i]
163 sub_layer_profile_idc[i] = read_bits(p, 5); //sub_layer_profile_idc[i]
164 for(int j=0; j<32; j++)
165 sub_layer_profile_compatibility_flag[i][j] = read_bits(p, 1);
166 read_bits(p, 1); //sub_layer_progressive_source_flag[i]
167 read_bits(p, 1); //sub_layer_interlaced_source_flag[i]
168 read_bits(p, 1); //sub_layer_non_packed_constraint_flag[i]
169 read_bits(p, 1); //sub_layer_frame_only_constraint_flag[i]
170 if (sub_layer_profile_idc[i] == 4 || sub_layer_profile_compatibility_flag[i][4] ||
171 sub_layer_profile_idc[i] == 5 || sub_layer_profile_compatibility_flag[i][5] ||
172 sub_layer_profile_idc[i] == 6 || sub_layer_profile_compatibility_flag[i][6] ||
173 sub_layer_profile_idc[i] == 7 || sub_layer_profile_compatibility_flag[i][7] ||
174 sub_layer_profile_idc[i] == 8 || sub_layer_profile_compatibility_flag[i][8] ||
175 sub_layer_profile_idc[i] == 9 || sub_layer_profile_compatibility_flag[i][9] ||
176 sub_layer_profile_idc[i] == 10 || sub_layer_profile_compatibility_flag[i][10] ||
177 sub_layer_profile_idc[i] == 11 || sub_layer_profile_compatibility_flag[i][11])
178 {
179 read_bits(p, 1); //sub_layer_max_422chrome_constraint_flag[i]
180 read_bits(p, 1); //sub_layer_max_420chrome_constraint_flag[i]
181 read_bits(p, 1); //sub_layer_max_monochrome_constraint_flag[i]
182 read_bits(p, 1); //sub_layer_intra_constraint_flag[i]
183 read_bits(p, 1); //sub_layer_one_picture_only_constraint_flag[i]
184 read_bits(p, 1); //sub_layer_lower_bit_rate_constraint_flag[i]
185 if (sub_layer_profile_idc[i] == 5 || sub_layer_profile_compatibility_flag[i][5] ||
186 sub_layer_profile_idc[i] == 9 || sub_layer_profile_compatibility_flag[i][9] ||
187 sub_layer_profile_idc[i] == 10 || sub_layer_profile_compatibility_flag[i][10] ||
188 sub_layer_profile_idc[i] == 11 || sub_layer_profile_compatibility_flag[i][11])
189 {
190 read_bits(p, 1); //sub_layer_max_14bit_constraint_flag[i]
191 read_bits(p, 33); //sub_layer_reserved_zero_33bits[i]
192 }
193 else
194 read_bits(p, 34); //sub_layer_reserved_zero_34bits[i]
195 }
196 else if (sub_layer_profile_idc[i] == 2 || sub_layer_profile_compatibility_flag[i][2])
197 {
198 read_bits(p, 7); //sub_layer_reserved_zero_7bits[i]
199 read_bits(p, 1); //sub_layer_one_picture_only_constraint_flag[i]
200 read_bits(p, 35); //sub_layer_reserved_zero_35bits[i]
201 }
202 else
203 read_bits(p, 43); //sub_layer_reserved_zero_43bits[i]
204
205 if (sub_layer_profile_idc[i] == 1 || sub_layer_profile_compatibility_flag[i][1] ||
206 sub_layer_profile_idc[i] == 2 || sub_layer_profile_compatibility_flag[i][2] ||
207 sub_layer_profile_idc[i] == 3 || sub_layer_profile_compatibility_flag[i][3] ||
208 sub_layer_profile_idc[i] == 4 || sub_layer_profile_compatibility_flag[i][4] ||
209 sub_layer_profile_idc[i] == 5 || sub_layer_profile_compatibility_flag[i][5] ||
210 sub_layer_profile_idc[i] == 5 || sub_layer_profile_compatibility_flag[i][9] ||
211 sub_layer_profile_idc[i] == 5 || sub_layer_profile_compatibility_flag[i][11])
212 {
213 read_bits(p, 1); //sub_layer_inbld_flag[i]
214 }else
215 read_bits(p, 1); //sub_layer_reserved_zero_bits[i]
216 }
217 if (sub_layer_level_present_flag[i])
218 read_bits(p, 8); //sub_layer_level_idc[i]
219 }
220 //profile_tier_level_end
221
222 read_bits_ue(p); //sps_seq_parameter_set_id
223 chroma_format_idc = read_bits_ue(p); //chroma_format_idc
224 if (chroma_format_idc == 3)
225 read_bits(p, 1); //separate_colour_plane_flag;
226 pic_width_in_luma_samples = read_bits_ue(p);
227 pic_height_in_luma_samples = read_bits_ue(p);
228 if (read_bits(p, 1)) //conformance_window_flag
229 {
230 read_bits_ue(p); //conf_win_left_offset
231 read_bits_ue(p); //conf_win_right_offset
232 read_bits_ue(p); //conf_win_top_offset
233 read_bits_ue(p); //conf_win_bottom_offset
234 }
235 bit_depth_luma_minus8 = read_bits_ue(p);
236 bit_depth_chroma_minus8 = read_bits_ue(p);
237 read_bits_ue(p); //log2_max_pic_order_cnt_lsb_minus4
238 sps_sub_layer_ordering_info_present_flag = read_bits(p, 1);
239 for (int i=(sps_sub_layer_ordering_info_present_flag?0:sps_max_sub_layers); i <= sps_max_sub_layers; i++)
240 {
241 sps_max_dec_pic_buffering_minus1[i] = read_bits_ue(p);
242 read_bits_ue(p); //sps_max_num_reorder_pics[i]
243 read_bits_ue(p); //sps_max_latency_increase_plus1[i]
244 }
245
246 printf("general_profile_idc:%d\n", general_profile_idc);
247 printf("general_level_idc:%d\n", general_level_idc);
248 printf("chroma_format_idc:%d\n", chroma_format_idc);
249 printf("pic_width_in_luma_samples:%d\n", pic_width_in_luma_samples);
250 printf("pic_height_in_luma_samples:%d\n", pic_height_in_luma_samples);
251 printf("bit_depth_luma_minus8:%d\n", bit_depth_luma_minus8);
252 printf("bit_depth_chroma_minus8:%d\n", bit_depth_chroma_minus8);
253 printf("sps_max_dec_pic_buffering_minus1[0]:%d\n", sps_max_dec_pic_buffering_minus1[0]);
254
255 return 0;
256 }
257
258 int hevc_filter_emulation_byte(uint8_t *p, int nalulen)
259 {
260 int delimitercnt = 0;
261 for (int i=0, j=0; i<nalulen; i++)
262 {
263 if (delimitercnt == 2 && p[i] < 0x03) //NALU内不能出现分隔符
264 return -1;
265 if (delimitercnt == 2 && p[i] == 0x03) //找到emulation_prevention_three_byte
266 {
267 if (i==nalulen-1)
268 return 0;
269 if (p[i+1] > 0x03) //语法非法,返回失败
270 return -1;
271 i++; //跳过emulation_prevention_three_byte
272 delimitercnt = 0;
273 }
274 p[j++] = p[i];
275 if (p[i] == 0x00)
276 delimitercnt++;
277 else
278 delimitercnt = 0;
279 }
280 }
281
282 #define READ_SIZE (1024*1024)
283 int main(int argc, char *argv[])
284 {
285 int offset_start, offset_end;
286 FILE *fp = fopen(argv[1], "r");
287 uint8_t *pSps = malloc(READ_SIZE);
288 fread(pSps, 1, READ_SIZE, fp);
289 fclose(fp);
290 for(int i=0; i<READ_SIZE; i++)
291 {
292 if (pSps[i] == 0x00 && pSps[i+1] == 0x00 && pSps[i+2] == 0x01 &&
293 ((pSps[i+3]>>1)&0x3f)==33)
294 {
295 offset_start = i + 5;
296 break;
297 }
298 }
299 for(int i=offset_start; i<READ_SIZE; i++)
300 {
301 if (pSps[i] == 0x00 && pSps[i+1] == 0x00 && pSps[i+2] == 0x01)
302 {
303 offset_end = i;
304 break;
305 }
306 }
307 hevc_filter_emulation_byte(pSps+offset_start, offset_end-offset_start);
308 hevc_analysis_sps(pSps+offset_start);
309
310 return 0;
311 }
312
313
314
315
316
View Code
运行结果如下:
工具解析该段码流结果与代码输出结果一致:
profile_tier_level:
在上述代码中主要使用了u(n)和ue(v)解析方法,在后续文章中继续学习说明。
参考:
1、T-REC-H.265-202108-I!!PDF-E.pdf