FFMPEG 时间戳PTS校正

前言

    FFMPEG时间戳校正是通过DTS来进行的,在compute_pkt_fields中实现。这篇文章也写了很久了,没有空整理,先发出来供需要的人参考。

代码

static void compute_pkt_fields(AVFormatContext *s, AVStream *st,
                                AVCodecParserContext *pc, AVPacket *pkt,
                                int64_t next_dts, int64_t next_pts)
 {
     int num, den, presentation_delayed, delay, i;
     int64_t offset;
     AVRational duration;
     int onein_oneout = st->codecpar->codec_id != AV_CODEC_ID_H264 &&
                        st->codecpar->codec_id != AV_CODEC_ID_HEVC;
     /* AVFMT_FLAG_NOFILLIN定义于AVFormatContext中,表示不需要进行pts处理,直接使用当前的pts */
     if (s->flags & AVFMT_FLAG_NOFILLIN)
         return;
     /* 视频,且当前pkt的dts存在 */
     if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO && pkt->dts != AV_NOPTS_VALUE) {
         /* 解码时间等于显示时间,并且之前已经记录到了正常的dts */
         if (pkt->dts == pkt->pts && st->last_dts_for_order_check != AV_NOPTS_VALUE) {
             /* 前述记录的正常dts小于等于当前的dts,用dts_ordered表示正常顺序的pkt数目。 */
             if (st->last_dts_for_order_check <= pkt->dts) {
                 st->dts_ordered++;
             } else {
                 /* 当前的dts比之前记录的dts小,用dts_misordered表示dts顺序异常的Pkt数目 */
                 st->dts_misordered++;
             }
             /* 总包数大于250时,包数都减半,这是为了避免变量溢出 */
             if (st->dts_ordered + st->dts_misordered > 250) {
                 st->dts_ordered    >>= 1;
                 st->dts_misordered >>= 1;
             }
         }
         /* 将当前有效的PTS设置为检查点的pts */
         st->last_dts_for_order_check = pkt->dts;
         /* dts正常的包少,并且当前包的pts和dts相等,则认为当前pkt的pts无效 */
         if (st->dts_ordered < 8*st->dts_misordered && pkt->dts == pkt->pts)
             pkt->dts = AV_NOPTS_VALUE;
     }
     /* 忽略DTS */
     if ((s->flags & AVFMT_FLAG_IGNDTS) && pkt->pts != AV_NOPTS_VALUE)
         pkt->dts = AV_NOPTS_VALUE;
     /* 遇到B帧,将B帧存在标记置上 */
     if (pc && pc->pict_type == AV_PICTURE_TYPE_B
         && !st->internal->avctx->has_b_frames)
         //FIXME Set low_delay = 0 when has_b_frames = 1
         st->internal->avctx->has_b_frames = 1;    /* do we have a video B-frame ? */
     delay = st->internal->avctx->has_b_frames;
     presentation_delayed = 0;    /* 存在B帧并且当前包不是B帧数据,显示delay置1 */
     if (delay &&
         pc && pc->pict_type != AV_PICTURE_TYPE_B)
         presentation_delayed = 1;
     /* pts_wrap_bits表示码流中某条流的PTS对应的bit数,
      * 比如TS的码流,PTS是33位,pts_wrap_bits就是33,
      * wrap在ffmpeg中是超过最大值后重新回到开始这种情况。中文翻译成换行。     
      */
      /* pts,dts都有值,但是dts远远大于pts,限制pts_wrap_bits小于63是为了防止移位溢出 */
     if (pkt->pts != AV_NOPTS_VALUE && pkt->dts != AV_NOPTS_VALUE &&
         st->pts_wrap_bits < 63 &&
         pkt->dts - (1LL << (st->pts_wrap_bits - 1)) > pkt->pts) {
         /* 大于RELATIVE_TS_BASE的值认为是相对值,cur_dts是相对值或者pkt的dts远大于cur_dts */
         if (is_relative(st->cur_dts) || pkt->dts - (1LL<<(st->pts_wrap_bits - 1)) > st->cur_dts) {
             pkt->dts -= 1LL << st->pts_wrap_bits;
         } else
             pkt->pts += 1LL << st->pts_wrap_bits;
     }    /* Some MPEG-2 in MPEG-PS lack dts (issue #171 / input_file.mpg).
      * We take the conservative approach and discard both.
      * Note: If this is misbehaving for an H.264 file, then possibly
      * presentation_delayed is not set correctly. */
     if (delay == 1 && pkt->dts == pkt->pts &&
         pkt->dts != AV_NOPTS_VALUE && presentation_delayed) {
         av_log(s, AV_LOG_DEBUG, "invalid dts/pts combination %"PRIi64"\n", pkt->dts);
         if (    strcmp(s->iformat->name, "mov,mp4,m4a,3gp,3g2,mj2")
              && strcmp(s->iformat->name, "flv")) // otherwise we discard correct timestamps for vc1-wmapro.ism
             pkt->dts = AV_NOPTS_VALUE;
     }    duration = av_mul_q((AVRational) {pkt->duration, 1}, st->time_base);
     if (pkt->duration == 0) {
         /* 根据帧率算时长 */
         ff_compute_frame_duration(s, &num, &den, st, pc, pkt);
         if (den && num) {
             duration = (AVRational) {num, den};
             pkt->duration = av_rescale_rnd(1,
                                            num * (int64_t) st->time_base.den,
                                            den * (int64_t) st->time_base.num,
                                            AV_ROUND_DOWN);
         }
     }
     /* 时长不为0且存在内部buffer,下面的操作更多是校正 */
     if (pkt->duration != 0 && (s->internal->packet_buffer || s->internal->parse_queue))
         update_initial_durations(s, st, pkt->stream_index, pkt->duration);    /* Correct timestamps with byte offset if demuxers only have timestamps
      * on packet boundaries */
     if (pc && st->need_parsing == AVSTREAM_PARSE_TIMESTAMPS && pkt->size) {
         /* this will estimate bitrate based on this frame's duration and size */
         offset = av_rescale(pc->offset, pkt->duration, pkt->size);
         if (pkt->pts != AV_NOPTS_VALUE)
             pkt->pts += offset;
         if (pkt->dts != AV_NOPTS_VALUE)
             pkt->dts += offset;
     }    /* This may be redundant, but it should not hurt. */
     if (pkt->dts != AV_NOPTS_VALUE &&
         pkt->pts != AV_NOPTS_VALUE &&
         pkt->pts > pkt->dts)
         presentation_delayed = 1;    if (s->debug & FF_FDEBUG_TS)
         av_log(s, AV_LOG_DEBUG,
             "IN delayed:%d pts:%s, dts:%s cur_dts:%s st:%d pc:%p duration:%"PRId64" delay:%d onein_oneout:%d\n",
             presentation_delayed, av_ts2str(pkt->pts), av_ts2str(pkt->dts), av_ts2str(st->cur_dts),
             pkt->stream_index, pc, pkt->duration, delay, onein_oneout);    /* Interpolate PTS and DTS if they are not present. We skip H264
      * currently because delay and has_b_frames are not reliably set.不是H264,H265影片 */这段是说,如果是I,P帧,如果DTS,PTS都无效的话,输出的PTS基本上就等于让他自生自灭了,如果是B帧的话,会让B帧的pts等于上次计算出来的当前应该具有的DTS,这是PTS校正的重点 */
     if ((delay == 0 || (delay == 1 && pc)) &&
         onein_oneout) {
         if (presentation_delayed) {
             /* DTS = decompression timestamp */
             /* PTS = presentation timestamp */
             if (pkt->dts == AV_NOPTS_VALUE)
                 pkt->dts = st->last_IP_pts;
             /* 更新stream的初始值 */
             update_initial_timestamps(s, pkt->stream_index, pkt->dts, pkt->pts, pkt);
             if (pkt->dts == AV_NOPTS_VALUE)
                 pkt->dts = st->cur_dts;            /* This is tricky: the dts must be incremented by the duration
              * of the frame we are displaying, i.e. the last I- or P-frame.I帧P帧时长 */
             if (st->last_IP_duration == 0 && (uint64_t)pkt->duration <= INT32_MAX)
                 st->last_IP_duration = pkt->duration;
             if (pkt->dts != AV_NOPTS_VALUE)
                 st->cur_dts = pkt->dts + st->last_IP_duration;
             /* pts不存在,dts存在,IP时长存在dts合理,用dts更新pts */
             if (pkt->dts != AV_NOPTS_VALUE &&
                 pkt->pts == AV_NOPTS_VALUE &&
                 st->last_IP_duration > 0 &&
                 ((uint64_t)st->cur_dts - (uint64_t)next_dts + 1) <= 2 &&
                 next_dts != next_pts &&
                 next_pts != AV_NOPTS_VALUE)
                 pkt->pts = next_dts;            if ((uint64_t)pkt->duration <= INT32_MAX)
                 st->last_IP_duration = pkt->duration;
             st->last_IP_pts      = pkt->pts;
             /* Cannot compute PTS if not present (we can compute it only
              * by knowing the future. */
         } else if (pkt->pts != AV_NOPTS_VALUE ||
                    pkt->dts != AV_NOPTS_VALUE ||
                    pkt->duration                ) {            /* presentation is not delayed : PTS and DTS are the same */
             if (pkt->pts == AV_NOPTS_VALUE)
                 pkt->pts = pkt->dts;
             update_initial_timestamps(s, pkt->stream_index, pkt->pts,
                                       pkt->pts, pkt);
             if (pkt->pts == AV_NOPTS_VALUE)
                 pkt->pts = st->cur_dts;
             pkt->dts = pkt->pts;
             /* 通过加上一个数计算下一个dts, 这个数可以理解成帧duration乘以采样率 */
             if (pkt->pts != AV_NOPTS_VALUE)
                 st->cur_dts = av_add_stable(st->time_base, pkt->pts, duration, 1);
         }
     }    if (pkt->pts != AV_NOPTS_VALUE && delay <= MAX_REORDER_DELAY) {
         st->pts_buffer[0] = pkt->pts;
         for (i = 0; i<delay && st->pts_buffer[i] > st->pts_buffer[i + 1]; i++)
             FFSWAP(int64_t, st->pts_buffer[i], st->pts_buffer[i + 1]);        if(has_decode_delay_been_guessed(st))
             pkt->dts = select_from_pts_buffer(st, st->pts_buffer, pkt->dts);
     }
     // We skipped it above so we try here.
     if (!onein_oneout)
         // This should happen on the first packet
         update_initial_timestamps(s, pkt->stream_index, pkt->dts, pkt->pts, pkt);
     if (pkt->dts > st->cur_dts)
         st->cur_dts = pkt->dts;    if (s->debug & FF_FDEBUG_TS)
         av_log(s, AV_LOG_DEBUG, "OUTdelayed:%d/%d pts:%s, dts:%s cur_dts:%s st:%d (%d)\n",
             presentation_delayed, delay, av_ts2str(pkt->pts), av_ts2str(pkt->dts), av_ts2str(st->cur_dts), st->index, st->id);    /* update flags */
     if (st->codecpar->codec_type == AVMEDIA_TYPE_DATA || is_intra_only(st->codecpar->codec_id))
         pkt->flags |= AV_PKT_FLAG_KEY;
 #if FF_API_CONVERGENCE_DURATION
 FF_DISABLE_DEPRECATION_WARNINGS
     if (pc)
         pkt->convergence_duration = pc->convergence_duration;
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif
 }/**
  * Return the frame duration in seconds. Return 0 if not available.
  */
 void ff_compute_frame_duration(AVFormatContext *s, int *pnum, int *pden, AVStream *st,
                                AVCodecParserContext *pc, AVPacket *pkt)
 {
     AVRational codec_framerate = s->iformat ? st->internal->avctx->framerate :
                                               av_mul_q(av_inv_q(st->internal->avctx->time_base), (AVRational){1, st->internal->avctx->ticks_per_frame});
     int frame_size, sample_rate;#if FF_API_LAVF_AVCTX
 FF_DISABLE_DEPRECATION_WARNINGS
     if ((!codec_framerate.den || !codec_framerate.num) && st->codec->time_base.den && st->codec->time_base.num)
         codec_framerate = av_mul_q(av_inv_q(st->codec->time_base), (AVRational){1, st->codec->ticks_per_frame});
 FF_ENABLE_DEPRECATION_WARNINGS
 #endif    *pnum = 0;
     *pden = 0;
     switch (st->codecpar->codec_type) {
     case AVMEDIA_TYPE_VIDEO:
         if (st->r_frame_rate.num && !pc && s->iformat) {
             *pnum = st->r_frame_rate.den;
             *pden = st->r_frame_rate.num;
         } else if (st->time_base.num * 1000LL > st->time_base.den) {
             *pnum = st->time_base.num;
             *pden = st->time_base.den;
         } else if (codec_framerate.den * 1000LL > codec_framerate.num) {
             av_assert0(st->internal->avctx->ticks_per_frame);
             av_reduce(pnum, pden,
                       codec_framerate.den,
                       codec_framerate.num * (int64_t)st->internal->avctx->ticks_per_frame,
                       INT_MAX);            if (pc && pc->repeat_pict) {
                 av_assert0(s->iformat); // this may be wrong for interlaced encoding but its not used for that case
                 av_reduce(pnum, pden,
                           (*pnum) * (1LL + pc->repeat_pict),
                           (*pden),
                           INT_MAX);
             }
             /* If this codec can be interlaced or progressive then we need
              * a parser to compute duration of a packet. Thus if we have
              * no parser in such case leave duration undefined. */
             if (st->internal->avctx->ticks_per_frame > 1 && !pc)
                 *pnum = *pden = 0;
         }
         break;
     case AVMEDIA_TYPE_AUDIO:
         if (st->internal->avctx_inited) {
             frame_size = av_get_audio_frame_duration(st->internal->avctx, pkt->size);
             sample_rate = st->internal->avctx->sample_rate;
         } else {
             frame_size = av_get_audio_frame_duration2(st->codecpar, pkt->size);
             sample_rate = st->codecpar->sample_rate;
         }
         if (frame_size <= 0 || sample_rate <= 0)
             break;
         *pnum = frame_size;
         *pden = sample_rate;
         break;
     default:
         break;
     }
 }static void update_initial_durations(AVFormatContext *s, AVStream *st,
                                      int stream_index, int duration)
 {
     AVPacketList *pktl = s->internal->packet_buffer ? s->internal->packet_buffer : s->internal->parse_queue;
     int64_t cur_dts    = RELATIVE_TS_BASE;
     /* first_dts会在update_initial_timestamps中设置,可以理解成第一个pkt的dts */
     if (st->first_dts != AV_NOPTS_VALUE) {
         if (st->update_initial_durations_done)
             return;
         st->update_initial_durations_done = 1;
         cur_dts = st->first_dts;
         for (; pktl; pktl = get_next_pkt(s, st, pktl)) {
             if (pktl->pkt.stream_index == stream_index) {
                 /* pts, dts,时长均有效退出,否则会一直去减时长 */
                 if (pktl->pkt.pts != pktl->pkt.dts  ||
                     pktl->pkt.dts != AV_NOPTS_VALUE ||
                     pktl->pkt.duration)
                     break;
                 cur_dts -= duration;
             }
         }
         /* 找到的dts与第一个dts不一样,可能是队列已经被读过了 */
         if (pktl && pktl->pkt.dts != st->first_dts) {
             av_log(s, AV_LOG_DEBUG, "first_dts %s not matching first dts %s (pts %s, duration %"PRId64") in the queue\n",
                    av_ts2str(st->first_dts), av_ts2str(pktl->pkt.dts), av_ts2str(pktl->pkt.pts), pktl->pkt.duration);
             return;
         }
         if (!pktl) {
             av_log(s, AV_LOG_DEBUG, "first_dts %s but no packet with dts in the queue\n", av_ts2str(st->first_dts));
             return;
         }
         pktl          = s->internal->packet_buffer ? s->internal->packet_buffer : s->internal->parse_queue;
         /* 如果cur_dts不等于first_dts,代表着pkt队列中存在无效的DTS */
         st->first_dts = cur_dts;
     } else if (st->cur_dts != RELATIVE_TS_BASE)
         return;    for (; pktl; pktl = get_next_pkt(s, st, pktl)) {
         if (pktl->pkt.stream_index != stream_index)
             continue;
         /* 1:pts等于dts或者pts无效 2:dts无效或等于第一个dts或者等于相对BASE,3:时长无效*/
         if ((pktl->pkt.pts == pktl->pkt.dts ||
              pktl->pkt.pts == AV_NOPTS_VALUE) &&
             (pktl->pkt.dts == AV_NOPTS_VALUE ||
              pktl->pkt.dts == st->first_dts ||
              pktl->pkt.dts == RELATIVE_TS_BASE) &&
             !pktl->pkt.duration) {
             pktl->pkt.dts = cur_dts;
             /* 无B帧存在,dts等于pts */
             if (!st->internal->avctx->has_b_frames)
                 pktl->pkt.pts = cur_dts;
 //            if (st->codecpar->codec_type != AVMEDIA_TYPE_AUDIO)
                 pktl->pkt.duration = duration;
         } else
             break;
         cur_dts = pktl->pkt.dts + pktl->pkt.duration;
     }
     /* 第一次,cur_pts为RELATIVE_TS_BASE */
     if (!pktl)
         st->cur_dts = cur_dts;
 }static int64_t select_from_pts_buffer(AVStream *st, int64_t *pts_buffer, int64_t dts) {
     int onein_oneout = st->codecpar->codec_id != AV_CODEC_ID_H264 &&
                        st->codecpar->codec_id != AV_CODEC_ID_HEVC;
     /* 是h264或者H265影片 */
     if(!onein_oneout) {
         int delay = st->internal->avctx->has_b_frames;
         int i;        if (dts == AV_NOPTS_VALUE) {
             int64_t best_score = INT64_MAX;
             for (i = 0; i<delay; i++) {
                 if (st->pts_reorder_error_count[i]) {
                     int64_t score = st->pts_reorder_error[i] / st->pts_reorder_error_count[i];
                     if (score < best_score) {
                         best_score = score;
                         dts = pts_buffer[i];
                     }
                 }
             }
         } else {
             for (i = 0; i<delay; i++) {
                 if (pts_buffer[i] != AV_NOPTS_VALUE) {
                     int64_t diff =  FFABS(pts_buffer[i] - dts)
                                     + (uint64_t)st->pts_reorder_error[i];
                     diff = FFMAX(diff, st->pts_reorder_error[i]);
                     st->pts_reorder_error[i] = diff;
                     st->pts_reorder_error_count[i]++;
                     if (st->pts_reorder_error_count[i] > 250) {
                         st->pts_reorder_error[i] >>= 1;
                         st->pts_reorder_error_count[i] >>= 1;
                     }
                 }
             }
         }
     }    if (dts == AV_NOPTS_VALUE)
         dts = pts_buffer[0];    return dts;
 }