emby 直通独显 emby amd显卡转码

转载

索姆拉 2024-08-07 21:32:12

文章标签 emby 直通独显 ffmpeg mjpeg转h264 #include ide 文章分类 机器学习人工智能

简单介绍下：公司am335x平台谈了一个安防方向的应用，基本功能差不多实现，客户提出在特定场景采集视频，然后转码为h264，通过局域网传输到服务器。采集视频采用uvc摄像头，采集格式支持mjpeg，yuv。考虑到两者采集文件都偏大，如果客户端较多，这样造成服务器端网络风暴，因此需要转码为h264.

uv视频格式，相同条件下文件过大，以及一个很现实的问题（am335x平台usb dma存在bug，高速率传输会丢包，因此限定分辨率上限是320x240）因此确定采集mjpeg视频(这个问题很烦人，我还花了2周时间追usb、uvc、cppi41驱动代码，追ti官方usb的buglist。。)。查阅资料，最终确定方案为：采集到mjpeg视频文件，通过ffmpeg+x264转码，最终以h264形式保存。文件显著变小：5s 25fps 640x480分辨率文件大小为8.5M mjpeg视频，转码后390k。

下面分2方面介绍这里的工作：完成转码基本功能、转码优化。建议刚接触音视频转码的童鞋先了解一下ffmpeg编解码的基本流程，以及一些基本概念：封装格式、编码格式、未经过压缩格式RGB、YUV422P、YUV420P，以及之间的转换：。

一、实现转码基本功能

1.移植ffmepg+x264+yasm:

从官网上下载最新的源码，交叉编译，应该比较简单。这里只说明一下编译选项。

yasm：./configure --enable-shared --prefix=/usr/local/cross-ffmpeg --host=arm-linux CC=/opt/arm-2014.05/bin/arm-none-linux-gnueabi-gcc

x264：./configure --enable-shared --host=arm-linux --prefix=/usr/local/cross-ffmpeg --cross-prefix=/opt/arm-2014.05/bin/arm-none-linux-gnueabi- --disable-asm

ffmpeg：./configure --enable-cross-compile --arch=armv7 --target-os=linux --cross-prefix=/opt/arm-2014.05/bin/arm-none-linux-gnueabi- --enable-shared --disable-static --enable-gpl --enable-libx264 --prefix=/usr/local/cross-ffmpeg --extra-cflags=-I/usr/local/cross-ffmpeg/include --extra-ldflags=-L/usr/local/cross-ffmpeg/lib/ --extra-libs=-ldl

ffmpeg是转码工具，x264是h264格式编解码器，yasm汇编级别的优化。

2.转码有两种方式：直接调用ffmpeg 或者编写code。考虑到转码工具不够灵活、可操作性不好使用code方式，并且mjpeg解码得到yuv422p，h264解码得到yuv420p，中间格式转码无法实现（这个猜想证明是错的，后来测试直接调用ffmepg是可以的,但是代码中yuv422转h264，转码后文件很大，不知道其中的差别在哪里）。

首先考虑直接在网上寻找成熟代码，然而真是没有。。查看ffmpeg 官方demo（/share/ffmpeg/example），使用decode_video.c 以及encode_video.c，但是怎么都运行不起来，总是报错退出（demo中是stream流形式，解码的是mjpeg1，编码的源文件的自己构造的数据。。反正种种不一致，加上自己好多东西不了解。）。参考了这个博客：，完成了基本代码。

另外这里要说明的是：v4l2接口采集并保存的文件偏大，vim查看文件发现文件很大一部分为0，看了下采集的demo，发现v4l2接口的大小并不准确，改为通过寻找0xff 0xd9（jpeg结束码）获得数据长度，然后再保存。

二、优化

网络上优化的方法基本上是：编译时enable-yasm，enable-neon，自己实现yuv、rgb格式转换（官方提供的sws_scale效率低）或者在io操作优化。依次尝试后，发现有一定改善，不过cpu转码仍然需要转码好长时间，比如：5s 25fps 640x480分辨率文件大小为8.5M mjpeg视频，转码后390k，图像质量基本一致，转码时间2min50s。

针对这个现象，网络上方案基本是在转码过程中加sleep，来降低cpu占有率，也有通过cgroup进行资源分配。我采用了降低解码进程的优先级的方式，这样既能提高cpu对其他任务的相应，也能在空闲时，最大化利用cpu。

另一个问题是ffmpeg转码占用了%20内存。。。没有发现内存泄露，也没有发现可以优化的部分，各位童鞋能给个建议吗？

附录：code

#include <math.h>
#include <libavutil/opt.h>
#include <libavcodec/avcodec.h>
#include <libavutil/channel_layout.h>
#include <libavutil/common.h>
#include <libavutil/imgutils.h>
#include <libavutil/mathematics.h>
#include <libavutil/samplefmt.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#define uinit8_t unsigned char
#include <sys/time.h>
#include <sys/resource.h>
#include <sched.h>
#include <sys/types.h>
#include <unistd.h>


static int  video_decode_example(const char *filename,const char *outfilename)
{
	av_log_set_level(AV_LOG_ERROR);  /* close part of ffmepg prints */
	/* register all the codecs */
	av_register_all();
	FILE* out = fopen(outfilename,"wb");
	
	AVFormatContext* pFormatCtx = NULL;
	//step 1:open file,get format info from file header
	if (avformat_open_input(&pFormatCtx, filename, NULL, NULL) != 0){
		fprintf(stderr,"avformat_open_input");
		return;
	}
	//step 2:get stread info
	if (avformat_find_stream_info(pFormatCtx, NULL) < 0){
		fprintf(stderr,"avformat_find_stream_info");
		return; 
	}
	//just output format info of input file
	av_dump_format(pFormatCtx, 0, filename, 0);
	int videoStream = -1;
	int i;
	//step 3:find vido stream
	for ( i = 0; i < pFormatCtx->nb_streams; i++)
	{
		if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
		{
			videoStream = i;
			break;
		}
	}
	if (videoStream == -1){
		fprintf(stderr,"find video stream error");
		return;
	}
	AVCodecContext* pCodecCtxOrg = NULL;
	AVCodecContext* pCodecCtx = NULL;

	AVCodec* pCodec = NULL;

	AVCodec* enc = avcodec_find_encoder(AV_CODEC_ID_H264);
	AVCodecContext* enc_ctx = avcodec_alloc_context3(enc);

	pCodecCtxOrg = pFormatCtx->streams[videoStream]->codec; // codec context        
	//step 4:find  decoder
	pCodec = avcodec_find_decoder(pCodecCtxOrg->codec_id);

	if (!pCodec){
		fprintf(stderr,"avcodec_find_decoder error");
		return;
	}
	//step 5:get one instance of AVCodecContext,decode need it.
	pCodecCtx = avcodec_alloc_context3(pCodec);
	if (avcodec_copy_context(pCodecCtx, pCodecCtxOrg) != 0){
		fprintf(stderr,"avcodec_copy_context error");
		return;
	}
	//step 6: open codec
	if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0){
		fprintf(stderr,"avcodec_open2 error");
		return;
	}
	AVFrame* pFrame = NULL;
	AVFrame* pFrameYUV = NULL;

	pFrame = av_frame_alloc();
	pFrameYUV = av_frame_alloc();

	int numBytes = 0;
	uint8_t* buffer = NULL;

 	enc_ctx->width = pCodecCtx->width;
	enc_ctx->height = pCodecCtx->height;
//	enc_ctx->bit_rate = 500000;
	enc_ctx->pix_fmt = AV_PIX_FMT_YUV420P;
	pCodecCtx->framerate = (AVRational){1,15};
	pCodecCtx->time_base = (AVRational){1,15};
	enc_ctx->time_base = pCodecCtx->time_base;
	enc_ctx->framerate = pCodecCtx->framerate;
	enc_ctx->gop_size = 12;
	enc_ctx->max_b_frames = 3;
	av_opt_set(enc_ctx->priv_data, "preset", "slow", 0);

	if (avcodec_open2(enc_ctx,enc,NULL)<0)
	{
		perror("open encodec");
		return -1;
	}

	buffer=(uint8_t *)av_malloc(avpicture_get_size(AV_PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height)*sizeof(uinit8_t)); 
	avpicture_fill((AVPicture *)pFrameYUV, buffer, AV_PIX_FMT_YUV420P, pCodecCtx->width, pCodecCtx->height);
	struct SwsContext* sws_ctx = NULL;

	AVPacket packet;
	AVPacket dst_packet;
	av_init_packet(&dst_packet);
	dst_packet.data = NULL;
	dst_packet.size = 0;
	int cnt0=0;
	int cnt1=0;
	 i = 0;
		int frameFinished = 0;
	//step 7:read frame
	while (av_read_frame(pFormatCtx, &packet) >= 0)
	{
		cnt0++;
		 frameFinished = 0;
		avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);
		if (frameFinished)
		{
	#if 0   // trancode between raw data(yuv420p yuv422p rgb and so on) 
			// using sws_ctx take more time,so do it ourself

			sws_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height, pCodecCtx->pix_fmt,
				pCodecCtx->width, pCodecCtx->height, AV_PIX_FMT_YUV420P, SWS_BILINEAR, NULL, NULL, NULL);
				//pCodecCtx->width, pCodecCtx->height, AV_PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL);

			sws_scale(sws_ctx, (uint8_t const * const *)pFrame->data, pFrame->linesize, 0,
					pCodecCtx->height, pFrameYUV->data, pFrameYUV->linesize);
	#else
			memset(pFrameYUV->data[0],'\0',pCodecCtx->width*pCodecCtx->height);
			memset(pFrameYUV->data[1],'\0',pCodecCtx->width*pCodecCtx->height/4);
			memset(pFrameYUV->data[2],'\0',pCodecCtx->width*pCodecCtx->height/4);

			memcpy(pFrameYUV->data[0],pFrame->data[0],pCodecCtx->width*pCodecCtx->height);
			for(i=0;i<pCodecCtx->height;i++){
				if(i%2){
					memcpy(pFrameYUV->data[1]+i/2*pCodecCtx->width/2,pFrame->data[1]+i/2*2*pCodecCtx->width/2,pCodecCtx->width/2);
				}else{
					memcpy(pFrameYUV->data[2]+i/2*pCodecCtx->width/2,pFrame->data[2]+i/2*2*pCodecCtx->width/2,pCodecCtx->width/2);
				}	
			}
	#endif
			if(avcodec_encode_video2(enc_ctx,&dst_packet,pFrameYUV,&frameFinished)<0){
				perror("encode video ");
				return -1;
			}
			if(frameFinished)
			{
				cnt1++;
				int ret = fwrite(dst_packet.data,1,dst_packet.size, out);
				//fflush(out);
				dst_packet.data = NULL;
				dst_packet.size = 0;
			}
		}
	}
	/* get the delayed frames */
	for ( frameFinished= 1;frameFinished; i++) {
		//fflush(stdout);

			if(avcodec_encode_video2(enc_ctx,&dst_packet,NULL,&frameFinished)<0){
				perror("encode video ");
				return -1;
			}
			if(frameFinished)
			{
				cnt1++;
				int ret = fwrite(dst_packet.data,1,dst_packet.size, out);
				//fflush(out);
				dst_packet.data = NULL;
				dst_packet.size = 0;
			}
	}   
	//release resource
	av_free_packet(&packet);

	av_free(buffer);
	av_frame_free(&pFrameYUV);

	av_frame_free(&pFrame);

	avcodec_close(pCodecCtx);
	avcodec_close(pCodecCtxOrg);

	avformat_close_input(&pFormatCtx);
	printf("total=%d,ok=%d\n",cnt0,cnt1);
	fclose(out);
}

int main(int argc, char **argv)
{
	/* set current process priority low to let the app run smooth */
	if (setpriority(PRIO_PROCESS,getpid(), 19) <0)
	{
		perror("fail to setpriority");
		exit(-1);
	}


	if (argc < 2) {
		printf("usage: %s input_file\n"
				"transcode video from mjpeg  to h264 to save memory\n"
				"example: ./transcode 3.avi out.h264",
				argv[0]);
		return 1;
	}
	if(video_decode_example(argv[1], argv[2])<0){
		printf("transcode fail\n");
		return -1;
	}

	return 0;
}

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。